[PATCH RFC 3/5] drm/amdgpu: Add DRM cgroup support for AMD devices

2018-11-20 Thread Kenny Ho
Change-Id: Ib66c44ac1b1c367659e362a2fc05b6fbb3805876
Signed-off-by: Kenny Ho 
---
 drivers/gpu/drm/amd/amdgpu/Makefile |  3 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  |  7 
 drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.c | 37 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.h | 19 +++
 include/drm/drmcgrp_vendors.h   |  1 +
 5 files changed, 67 insertions(+)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 138cb787d27e..5cf8048f2d75 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -186,4 +186,7 @@ amdgpu-y += $(AMD_DISPLAY_FILES)
 
 endif
 
+#DRM cgroup controller
+amdgpu-y += amdgpu_drmcgrp.o
+
 obj-$(CONFIG_DRM_AMDGPU)+= amdgpu.o
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 30bc345d6fdf..ad0373f83ed3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -33,6 +33,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -2645,6 +2646,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
goto failed;
}
 
+   /* TODO:docs */
+   if (drmcgrp_vendors[amd_drmcgrp_vendor_id] == NULL)
+   drmcgrp_register_vendor(_drmcgrp_vendor, 
amd_drmcgrp_vendor_id);
+
+   drmcgrp_register_device(adev->ddev, amd_drmcgrp_vendor_id);
+
return 0;
 
 failed:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.c
new file mode 100644
index ..ed8aac17769c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: MIT
+// Copyright 2018 Advanced Micro Devices, Inc.
+#include 
+#include 
+#include 
+#include "amdgpu_drmcgrp.h"
+
+struct cftype files[] = {
+   { } /* terminate */
+};
+
+struct cftype *drmcgrp_amd_get_cftypes(void)
+{
+   return files;
+}
+
+struct drmcgrp_device_resource *amd_drmcgrp_alloc_dev_resource(void)
+{
+   struct amd_drmcgrp_dev_resource *a_ddr;
+
+   a_ddr = kzalloc(sizeof(struct amd_drmcgrp_dev_resource), GFP_KERNEL);
+   if (!a_ddr)
+   return ERR_PTR(-ENOMEM);
+
+   return _ddr->ddr;
+}
+
+void amd_drmcgrp_free_dev_resource(struct drmcgrp_device_resource *ddr)
+{
+   kfree(ddr_amdddr(ddr));
+}
+
+struct drmcgrp_vendor amd_drmcgrp_vendor = {
+   .get_cftypes = drmcgrp_amd_get_cftypes,
+   .alloc_dev_resource = amd_drmcgrp_alloc_dev_resource,
+   .free_dev_resource = amd_drmcgrp_free_dev_resource,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.h
new file mode 100644
index ..e2934b7a49f5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ */
+#ifndef _AMDGPU_DRMCGRP_H
+#define _AMDGPU_DRMCGRP_H
+
+#include 
+
+/* for AMD specific DRM resources */
+struct amd_drmcgrp_dev_resource {
+   struct drmcgrp_device_resource ddr;
+};
+
+static inline struct amd_drmcgrp_dev_resource *ddr_amdddr(struct 
drmcgrp_device_resource *ddr)
+{
+   return ddr ? container_of(ddr, struct amd_drmcgrp_dev_resource, ddr) : 
NULL;
+}
+
+#endif /* _AMDGPU_DRMCGRP_H */
diff --git a/include/drm/drmcgrp_vendors.h b/include/drm/drmcgrp_vendors.h
index b04d8649851b..6cfbf1825344 100644
--- a/include/drm/drmcgrp_vendors.h
+++ b/include/drm/drmcgrp_vendors.h
@@ -3,5 +3,6 @@
  */
 #if IS_ENABLED(CONFIG_CGROUP_DRM)
 
+DRMCGRP_VENDOR(amd)
 
 #endif
-- 
2.19.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH RFC 5/5] drm/amdgpu: Add accounting of buffer object creation request via DRM cgroup

2018-11-20 Thread Kenny Ho
Account for the total size of buffer object requested to amdgpu by
buffer type on a per cgroup basis.

x prefix in the control file name x.bo_requested.amd.stat signify
experimental.

Change-Id: Ifb680c4bcf3652879a7a659510e25680c2465cf6
Signed-off-by: Kenny Ho 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.c | 56 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.h |  3 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 13 +
 include/uapi/drm/amdgpu_drm.h   | 24 ++---
 4 files changed, 90 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.c
index 853b77532428..e3d98ed01b79 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.c
@@ -7,6 +7,57 @@
 #include "amdgpu_ring.h"
 #include "amdgpu_drmcgrp.h"
 
+void amdgpu_drmcgrp_count_bo_req(struct task_struct *task, struct drm_device 
*dev,
+   u32 domain, unsigned long size)
+{
+   struct drmcgrp *drmcgrp = get_drmcgrp(task);
+   struct drmcgrp_device_resource *ddr;
+   struct drmcgrp *p;
+   struct amd_drmcgrp_dev_resource *a_ddr;
+int i;
+
+   if (drmcgrp == NULL)
+   return;
+
+   ddr = drmcgrp->dev_resources[dev->primary->index];
+
+   mutex_lock(>ddev->mutex);
+   for (p = drmcgrp; p != NULL; p = parent_drmcgrp(drmcgrp)) {
+   a_ddr = ddr_amdddr(p->dev_resources[dev->primary->index]);
+
+   for (i = 0; i < __MAX_AMDGPU_MEM_DOMAIN; i++)
+   if ( (1 << i) & domain)
+   a_ddr->bo_req_count[i] += size;
+   }
+   mutex_unlock(>ddev->mutex);
+}
+
+int amd_drmcgrp_bo_req_stat_read(struct seq_file *sf, void *v)
+{
+   struct drmcgrp *drmcgrp = css_drmcgrp(seq_css(sf));
+   struct drmcgrp_device_resource *ddr = NULL;
+   struct amd_drmcgrp_dev_resource *a_ddr = NULL;
+   int i, j;
+
+   seq_puts(sf, "---\n");
+   for (i = 0; i < MAX_DRM_DEV; i++) {
+   ddr = drmcgrp->dev_resources[i];
+
+   if (ddr == NULL || ddr->ddev->vid != amd_drmcgrp_vendor_id)
+   continue;
+
+   a_ddr = ddr_amdddr(ddr);
+
+   seq_printf(sf, "card%d:\n", i);
+   for (j = 0; j < __MAX_AMDGPU_MEM_DOMAIN; j++)
+   seq_printf(sf, "  %s: %llu\n", 
amdgpu_mem_domain_names[j], a_ddr->bo_req_count[j]);
+   }
+
+   return 0;
+}
+
+
+
 void amdgpu_drmcgrp_count_cs(struct task_struct *task, struct drm_device *dev,
enum amdgpu_ring_type r_type)
 {
@@ -55,6 +106,11 @@ int amd_drmcgrp_cmd_submit_accounting_read(struct seq_file 
*sf, void *v)
 
 
 struct cftype files[] = {
+   {
+   .name = "x.bo_requested.amd.stat",
+   .seq_show = amd_drmcgrp_bo_req_stat_read,
+   .flags = CFTYPE_NOT_ON_ROOT,
+   },
{
.name = "x.cmd_submitted.amd.stat",
.seq_show = amd_drmcgrp_cmd_submit_accounting_read,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.h
index f894a9a1059f..8b9d61e47dde 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.h
@@ -11,10 +11,13 @@
 struct amd_drmcgrp_dev_resource {
struct drmcgrp_device_resource ddr;
u64 cs_count[__MAX_AMDGPU_RING_TYPE];
+   u64 bo_req_count[__MAX_AMDGPU_MEM_DOMAIN];
 };
 
 void amdgpu_drmcgrp_count_cs(struct task_struct *task, struct drm_device *dev,
enum amdgpu_ring_type r_type);
+void amdgpu_drmcgrp_count_bo_req(struct task_struct *task, struct drm_device 
*dev,
+   u32 domain, unsigned long size);
 
 static inline struct amd_drmcgrp_dev_resource *ddr_amdddr(struct 
drmcgrp_device_resource *ddr)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 7b3d1ebda9df..339e1d3edad8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -31,6 +31,17 @@
 #include 
 #include "amdgpu.h"
 #include "amdgpu_display.h"
+#include "amdgpu_drmcgrp.h"
+
+char const *amdgpu_mem_domain_names[] = {
+   [AMDGPU_MEM_DOMAIN_CPU] = "cpu",
+   [AMDGPU_MEM_DOMAIN_GTT] = "gtt",
+   [AMDGPU_MEM_DOMAIN_VRAM]= "vram",
+   [AMDGPU_MEM_DOMAIN_GDS] = "gds",
+   [AMDGPU_MEM_DOMAIN_GWS] = "gws",
+   [AMDGPU_MEM_DOMAIN_OA]  = "oa",
+   [__MAX_AMDGPU_MEM_DOMAIN]   = "_max"
+};
 
 void amdgpu_gem_object_free(struct drm_gem_object *gobj)
 {
@@ -52,6 +63,8 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, 
unsigne

[PATCH RFC 2/5] cgroup: Add mechanism to register vendor specific DRM devices

2018-11-20 Thread Kenny Ho
Since many parts of the DRM subsystem has vendor-specific
implementations, we introduce mechanisms for vendor to register their
specific resources and control files to the DRM cgroup subsystem.  A
vendor will register itself with the DRM cgroup subsystem first before
registering individual DRM devices to the cgroup subsystem.

In addition to the cgroup_subsys_state that is common to all DRM
devices, a device-specific state is introduced and it is allocated
according to the vendor of the device.

Change-Id: I908ee6975ea0585e4c30eafde4599f87094d8c65
Signed-off-by: Kenny Ho 
---
 include/drm/drm_cgroup.h  | 39 
 include/drm/drmcgrp_vendors.h |  7 +++
 include/linux/cgroup_drm.h| 26 +++
 kernel/cgroup/drm.c   | 84 +++
 4 files changed, 156 insertions(+)
 create mode 100644 include/drm/drm_cgroup.h
 create mode 100644 include/drm/drmcgrp_vendors.h

diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h
new file mode 100644
index ..26cbea7059a6
--- /dev/null
+++ b/include/drm/drm_cgroup.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: MIT
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ */
+#ifndef __DRM_CGROUP_H__
+#define __DRM_CGROUP_H__
+
+#define DRMCGRP_VENDOR(_x) _x ## _drmcgrp_vendor_id,
+enum drmcgrp_vendor_id {
+#include 
+   DRMCGRP_VENDOR_COUNT,
+};
+#undef DRMCGRP_VENDOR
+
+#define DRMCGRP_VENDOR(_x) extern struct drmcgrp_vendor _x ## _drmcgrp_vendor;
+#include 
+#undef DRMCGRP_VENDOR
+
+
+
+#ifdef CONFIG_CGROUP_DRM
+
+extern struct drmcgrp_vendor *drmcgrp_vendors[];
+
+int drmcgrp_register_vendor(struct drmcgrp_vendor *vendor, enum 
drmcgrp_vendor_id id);
+int drmcgrp_register_device(struct drm_device *device, enum drmcgrp_vendor_id 
id);
+
+#else
+static int drmcgrp_register_vendor(struct drmcgrp_vendor *vendor, enum 
drmcgrp_vendor_id id)
+{
+   return 0;
+}
+
+static int drmcgrp_register_device(struct drm_device *device, enum 
drmcgrp_vendor_id id)
+{
+   return 0;
+}
+
+#endif /* CONFIG_CGROUP_DRM */
+#endif /* __DRM_CGROUP_H__ */
diff --git a/include/drm/drmcgrp_vendors.h b/include/drm/drmcgrp_vendors.h
new file mode 100644
index ..b04d8649851b
--- /dev/null
+++ b/include/drm/drmcgrp_vendors.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: MIT
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ */
+#if IS_ENABLED(CONFIG_CGROUP_DRM)
+
+
+#endif
diff --git a/include/linux/cgroup_drm.h b/include/linux/cgroup_drm.h
index 79ab38b0f46d..a776662d9593 100644
--- a/include/linux/cgroup_drm.h
+++ b/include/linux/cgroup_drm.h
@@ -6,10 +6,36 @@
 
 #ifdef CONFIG_CGROUP_DRM
 
+#include 
 #include 
+#include 
+#include 
+
+/* limit defined per the way drm_minor_alloc operates */
+#define MAX_DRM_DEV (64 * DRM_MINOR_RENDER)
+
+struct drmcgrp_device {
+   enum drmcgrp_vendor_id  vid;
+   struct drm_device   *dev;
+   struct mutexmutex;
+};
+
+/* vendor-common resource counting goes here */
+/* this struct should be included in the vendor specific resource */
+struct drmcgrp_device_resource {
+   struct drmcgrp_device   *ddev;
+};
+
+struct drmcgrp_vendor {
+   struct cftype *(*get_cftypes)(void);
+   struct drmcgrp_device_resource *(*alloc_dev_resource)(void);
+   void (*free_dev_resource)(struct drmcgrp_device_resource *dev_resource);
+};
+
 
 struct drmcgrp {
struct cgroup_subsys_state  css;
+   struct drmcgrp_device_resource  *dev_resources[MAX_DRM_DEV];
 };
 
 static inline struct drmcgrp *css_drmcgrp(struct cgroup_subsys_state *css)
diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
index d9e194b9aead..f9630cc389bc 100644
--- a/kernel/cgroup/drm.c
+++ b/kernel/cgroup/drm.c
@@ -1,8 +1,30 @@
 // SPDX-License-Identifier: MIT
 // Copyright 2018 Advanced Micro Devices, Inc.
+#include 
 #include 
 #include 
+#include 
+#include 
+#include 
 #include 
+#include 
+#include 
+
+/* generate an array of drm cgroup vendor pointers */
+#define DRMCGRP_VENDOR(_x)[_x ## _drmcgrp_vendor_id] = NULL,
+struct drmcgrp_vendor *drmcgrp_vendors[] = {
+#include 
+};
+#undef DRMCGRP_VENDOR
+EXPORT_SYMBOL(drmcgrp_vendors);
+
+static DEFINE_MUTEX(drmcgrp_mutex);
+
+/* indexed by drm_minor for access speed */
+static struct drmcgrp_device   *known_drmcgrp_devs[MAX_DRM_DEV];
+
+static int max_minor;
+
 
 static u64 drmcgrp_test_read(struct cgroup_subsys_state *css,
struct cftype *cft)
@@ -13,6 +35,12 @@ static u64 drmcgrp_test_read(struct cgroup_subsys_state *css,
 static void drmcgrp_css_free(struct cgroup_subsys_state *css)
 {
struct drmcgrp *drmcgrp = css_drmcgrp(css);
+   int i;
+
+   for (i = 0; i <= max_minor; i++) {
+   if (drmcgrp->dev_resources[i] != NULL)
+   
drmcgrp_vendors[known_drmcgrp_devs[i]->vid]->free_dev_resource(drmcgrp->dev_resources[i]);
+   }
 
kfree(css_drmcgrp(css));
 }
@@ -21,11 +49,27 @@ static struct cgro

[PATCH RFC 4/5] drm/amdgpu: Add accounting of command submission via DRM cgroup

2018-11-20 Thread Kenny Ho
Account for the number of command submitted to amdgpu by type on a per
cgroup basis, for the purpose of profiling/monitoring applications.

x prefix in the control file name x.cmd_submitted.amd.stat signify
experimental.

Change-Id: Ibc22e5bda600f54fe820fe0af5400ca348691550
Signed-off-by: Kenny Ho 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  5 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.c | 54 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.h |  5 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c| 15 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h|  5 +-
 5 files changed, 83 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 663043c8f0f5..b448160aed89 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -33,6 +33,7 @@
 #include "amdgpu_trace.h"
 #include "amdgpu_gmc.h"
 #include "amdgpu_gem.h"
+#include "amdgpu_drmcgrp.h"
 
 static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
  struct drm_amdgpu_cs_chunk_fence *data,
@@ -1275,6 +1276,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
union drm_amdgpu_cs *cs = data;
struct amdgpu_cs_parser parser = {};
bool reserved_buffers = false;
+   struct amdgpu_ring *ring;
int i, r;
 
if (!adev->accel_working)
@@ -1317,6 +1319,9 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
if (r)
goto out;
 
+   ring = to_amdgpu_ring(parser.entity->rq->sched);
+   amdgpu_drmcgrp_count_cs(current, dev, ring->funcs->type);
+
r = amdgpu_cs_submit(, cs);
 
 out:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.c
index ed8aac17769c..853b77532428 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.c
@@ -1,11 +1,65 @@
 // SPDX-License-Identifier: MIT
 // Copyright 2018 Advanced Micro Devices, Inc.
 #include 
+#include 
 #include 
 #include 
+#include "amdgpu_ring.h"
 #include "amdgpu_drmcgrp.h"
 
+void amdgpu_drmcgrp_count_cs(struct task_struct *task, struct drm_device *dev,
+   enum amdgpu_ring_type r_type)
+{
+   struct drmcgrp *drmcgrp = get_drmcgrp(task);
+   struct drmcgrp_device_resource *ddr;
+   struct drmcgrp *p;
+   struct amd_drmcgrp_dev_resource *a_ddr;
+
+   if (drmcgrp == NULL)
+   return;
+
+   ddr = drmcgrp->dev_resources[dev->primary->index];
+
+   mutex_lock(>ddev->mutex);
+   for (p = drmcgrp; p != NULL; p = parent_drmcgrp(drmcgrp)) {
+   a_ddr = ddr_amdddr(p->dev_resources[dev->primary->index]);
+
+   a_ddr->cs_count[r_type]++;
+   }
+   mutex_unlock(>ddev->mutex);
+}
+
+int amd_drmcgrp_cmd_submit_accounting_read(struct seq_file *sf, void *v)
+{
+   struct drmcgrp *drmcgrp = css_drmcgrp(seq_css(sf));
+   struct drmcgrp_device_resource *ddr = NULL;
+   struct amd_drmcgrp_dev_resource *a_ddr = NULL;
+   int i, j;
+
+   seq_puts(sf, "---\n");
+   for (i = 0; i < MAX_DRM_DEV; i++) {
+   ddr = drmcgrp->dev_resources[i];
+
+   if (ddr == NULL || ddr->ddev->vid != amd_drmcgrp_vendor_id)
+   continue;
+
+   a_ddr = ddr_amdddr(ddr);
+
+   seq_printf(sf, "card%d:\n", i);
+   for (j = 0; j < __MAX_AMDGPU_RING_TYPE; j++)
+   seq_printf(sf, "  %s: %llu\n", amdgpu_ring_names[j], 
a_ddr->cs_count[j]);
+   }
+
+   return 0;
+}
+
+
 struct cftype files[] = {
+   {
+   .name = "x.cmd_submitted.amd.stat",
+   .seq_show = amd_drmcgrp_cmd_submit_accounting_read,
+   .flags = CFTYPE_NOT_ON_ROOT,
+   },
{ } /* terminate */
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.h
index e2934b7a49f5..f894a9a1059f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.h
@@ -5,12 +5,17 @@
 #define _AMDGPU_DRMCGRP_H
 
 #include 
+#include "amdgpu_ring.h"
 
 /* for AMD specific DRM resources */
 struct amd_drmcgrp_dev_resource {
struct drmcgrp_device_resource ddr;
+   u64 cs_count[__MAX_AMDGPU_RING_TYPE];
 };
 
+void amdgpu_drmcgrp_count_cs(struct task_struct *task, struct drm_device *dev,
+   enum amdgpu_ring_type r_type);
+
 static inline struct amd_drmcgrp_dev_resource *ddr_amdddr(struct 
drmcgrp_device_resource *ddr)
 {
return ddr ? container_of(ddr, struct amd_drmcgrp_dev_resource, ddr) : 
NULL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
b/drivers

[PATCH RFC 1/5] cgroup: Introduce cgroup for drm subsystem

2018-11-20 Thread Kenny Ho
Change-Id: I6830d3990f63f0c13abeba29b1d330cf28882831
Signed-off-by: Kenny Ho 
---
 include/linux/cgroup_drm.h| 32 
 include/linux/cgroup_subsys.h |  4 +++
 init/Kconfig  |  5 
 kernel/cgroup/Makefile|  1 +
 kernel/cgroup/drm.c   | 46 +++
 5 files changed, 88 insertions(+)
 create mode 100644 include/linux/cgroup_drm.h
 create mode 100644 kernel/cgroup/drm.c

diff --git a/include/linux/cgroup_drm.h b/include/linux/cgroup_drm.h
new file mode 100644
index ..79ab38b0f46d
--- /dev/null
+++ b/include/linux/cgroup_drm.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: MIT
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ */
+#ifndef _CGROUP_DRM_H
+#define _CGROUP_DRM_H
+
+#ifdef CONFIG_CGROUP_DRM
+
+#include 
+
+struct drmcgrp {
+   struct cgroup_subsys_state  css;
+};
+
+static inline struct drmcgrp *css_drmcgrp(struct cgroup_subsys_state *css)
+{
+   return css ? container_of(css, struct drmcgrp, css) : NULL;
+}
+
+static inline struct drmcgrp *get_drmcgrp(struct task_struct *task)
+{
+   return css_drmcgrp(task_get_css(task, drm_cgrp_id));
+}
+
+
+static inline struct drmcgrp *parent_drmcgrp(struct drmcgrp *cg)
+{
+   return css_drmcgrp(cg->css.parent);
+}
+
+#endif /* CONFIG_CGROUP_DRM */
+#endif /* _CGROUP_DRM_H */
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index acb77dcff3b4..ddedad809e8b 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -61,6 +61,10 @@ SUBSYS(pids)
 SUBSYS(rdma)
 #endif
 
+#if IS_ENABLED(CONFIG_CGROUP_DRM)
+SUBSYS(drm)
+#endif
+
 /*
  * The following subsystems are not supported on the default hierarchy.
  */
diff --git a/init/Kconfig b/init/Kconfig
index a4112e95724a..bee1e164443a 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -836,6 +836,11 @@ config CGROUP_RDMA
  Attaching processes with active RDMA resources to the cgroup
  hierarchy is allowed even if can cross the hierarchy's limit.
 
+config CGROUP_DRM
+   bool "DRM controller (EXPERIMENTAL)"
+   help
+ Provides accounting and enforcement of resources in the DRM subsystem.
+
 config CGROUP_FREEZER
bool "Freezer controller"
help
diff --git a/kernel/cgroup/Makefile b/kernel/cgroup/Makefile
index bfcdae896122..6af14bd93050 100644
--- a/kernel/cgroup/Makefile
+++ b/kernel/cgroup/Makefile
@@ -4,5 +4,6 @@ obj-y := cgroup.o rstat.o namespace.o cgroup-v1.o
 obj-$(CONFIG_CGROUP_FREEZER) += freezer.o
 obj-$(CONFIG_CGROUP_PIDS) += pids.o
 obj-$(CONFIG_CGROUP_RDMA) += rdma.o
+obj-$(CONFIG_CGROUP_DRM) += drm.o
 obj-$(CONFIG_CPUSETS) += cpuset.o
 obj-$(CONFIG_CGROUP_DEBUG) += debug.o
diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
new file mode 100644
index ..d9e194b9aead
--- /dev/null
+++ b/kernel/cgroup/drm.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: MIT
+// Copyright 2018 Advanced Micro Devices, Inc.
+#include 
+#include 
+#include 
+
+static u64 drmcgrp_test_read(struct cgroup_subsys_state *css,
+   struct cftype *cft)
+{
+   return 88;
+}
+
+static void drmcgrp_css_free(struct cgroup_subsys_state *css)
+{
+   struct drmcgrp *drmcgrp = css_drmcgrp(css);
+
+   kfree(css_drmcgrp(css));
+}
+
+static struct cgroup_subsys_state *
+drmcgrp_css_alloc(struct cgroup_subsys_state *parent_css)
+{
+   struct drmcgrp *drmcgrp;
+
+   drmcgrp = kzalloc(sizeof(struct drmcgrp), GFP_KERNEL);
+   if (!drmcgrp)
+   return ERR_PTR(-ENOMEM);
+
+   return >css;
+}
+
+struct cftype files[] = {
+   {
+   .name = "drm_test",
+   .read_u64 = drmcgrp_test_read,
+   },
+   { } /* terminate */
+};
+
+struct cgroup_subsys drm_cgrp_subsys = {
+   .css_alloc  = drmcgrp_css_alloc,
+   .css_free   = drmcgrp_css_free,
+   .early_init = false,
+   .legacy_cftypes = files,
+   .dfl_cftypes= files,
+};
-- 
2.19.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH RFC 5/5] drm/amdgpu: Add accounting of buffer object creation request via DRM cgroup

2018-11-27 Thread Kenny Ho
Hey Christian,

Sorry for the late reply, I missed this for some reason.

On Wed, Nov 21, 2018 at 5:00 AM Christian König
 wrote:
> > diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> > index 370e9a5536ef..531726443104 100644
> > --- a/include/uapi/drm/amdgpu_drm.h
> > +++ b/include/uapi/drm/amdgpu_drm.h
> > @@ -72,6 +72,18 @@ extern "C" {
> >   #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + 
> > DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
> >   #define DRM_IOCTL_AMDGPU_SCHED  DRM_IOW(DRM_COMMAND_BASE + 
> > DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
> >
> > +enum amdgpu_mem_domain {
> > + AMDGPU_MEM_DOMAIN_CPU,
> > + AMDGPU_MEM_DOMAIN_GTT,
> > + AMDGPU_MEM_DOMAIN_VRAM,
> > + AMDGPU_MEM_DOMAIN_GDS,
> > + AMDGPU_MEM_DOMAIN_GWS,
> > + AMDGPU_MEM_DOMAIN_OA,
> > + __MAX_AMDGPU_MEM_DOMAIN
> > +};
>
> Well that is a clear NAK since it duplicates the TTM defines. Please use
> that one instead and don't make this UAPI.
This is defined to help with the chunk of changes below.  The
AMDGPU_GEM_DOMAIN* already exists and this is similar to how TTM has
TTM_PL_* to help with the creation of TTM_PL_FLAG_*:
https://elixir.bootlin.com/linux/v4.20-rc4/source/include/drm/ttm/ttm_placement.h#L36

I don't disagree that there is a duplication here but it's
pre-existing so if you can help clarify my confusion that would be
much appreciated.

Reards,
Kenny

> > +
> > +extern char const *amdgpu_mem_domain_names[];
> > +
> >   /**
> >* DOC: memory domains
> >*
> > @@ -95,12 +107,12 @@ extern "C" {
> >* %AMDGPU_GEM_DOMAIN_OAOrdered append, used by 3D or Compute engines
> >* for appending data.
> >*/
> > -#define AMDGPU_GEM_DOMAIN_CPU0x1
> > -#define AMDGPU_GEM_DOMAIN_GTT0x2
> > -#define AMDGPU_GEM_DOMAIN_VRAM   0x4
> > -#define AMDGPU_GEM_DOMAIN_GDS0x8
> > -#define AMDGPU_GEM_DOMAIN_GWS0x10
> > -#define AMDGPU_GEM_DOMAIN_OA 0x20
> > +#define AMDGPU_GEM_DOMAIN_CPU(1 << AMDGPU_MEM_DOMAIN_CPU)
> > +#define AMDGPU_GEM_DOMAIN_GTT(1 << AMDGPU_MEM_DOMAIN_GTT)
> > +#define AMDGPU_GEM_DOMAIN_VRAM   (1 << AMDGPU_MEM_DOMAIN_VRAM)
> > +#define AMDGPU_GEM_DOMAIN_GDS(1 << AMDGPU_MEM_DOMAIN_GDS)
> > +#define AMDGPU_GEM_DOMAIN_GWS(1 << AMDGPU_MEM_DOMAIN_GWS)
> > +#define AMDGPU_GEM_DOMAIN_OA (1 << AMDGPU_MEM_DOMAIN_OA)
> >   #define AMDGPU_GEM_DOMAIN_MASK  (AMDGPU_GEM_DOMAIN_CPU | \
> >AMDGPU_GEM_DOMAIN_GTT | \
> >AMDGPU_GEM_DOMAIN_VRAM | \
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH RFC 5/5] drm/amdgpu: Add accounting of buffer object creation request via DRM cgroup

2018-11-27 Thread Kenny Ho
Ah I see.  Thank you for the clarification.

Regards,
Kenny
On Tue, Nov 27, 2018 at 3:31 PM Christian König
 wrote:
>
> Am 27.11.18 um 19:15 schrieb Kenny Ho:
> > Hey Christian,
> >
> > Sorry for the late reply, I missed this for some reason.
> >
> > On Wed, Nov 21, 2018 at 5:00 AM Christian König
> >  wrote:
> >>> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> >>> index 370e9a5536ef..531726443104 100644
> >>> --- a/include/uapi/drm/amdgpu_drm.h
> >>> +++ b/include/uapi/drm/amdgpu_drm.h
> >>> @@ -72,6 +72,18 @@ extern "C" {
> >>>#define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + 
> >>> DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
> >>>#define DRM_IOCTL_AMDGPU_SCHED  DRM_IOW(DRM_COMMAND_BASE + 
> >>> DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
> >>>
> >>> +enum amdgpu_mem_domain {
> >>> + AMDGPU_MEM_DOMAIN_CPU,
> >>> + AMDGPU_MEM_DOMAIN_GTT,
> >>> + AMDGPU_MEM_DOMAIN_VRAM,
> >>> + AMDGPU_MEM_DOMAIN_GDS,
> >>> + AMDGPU_MEM_DOMAIN_GWS,
> >>> + AMDGPU_MEM_DOMAIN_OA,
> >>> + __MAX_AMDGPU_MEM_DOMAIN
> >>> +};
> >> Well that is a clear NAK since it duplicates the TTM defines. Please use
> >> that one instead and don't make this UAPI.
> > This is defined to help with the chunk of changes below.  The
> > AMDGPU_GEM_DOMAIN* already exists and this is similar to how TTM has
> > TTM_PL_* to help with the creation of TTM_PL_FLAG_*:
> > https://elixir.bootlin.com/linux/v4.20-rc4/source/include/drm/ttm/ttm_placement.h#L36
> >
> > I don't disagree that there is a duplication here but it's
> > pre-existing so if you can help clarify my confusion that would be
> > much appreciated.
>
> The AMDGPU_GEM_DOMAIN are masks which are used in the frontend IOCTL
> interface to create BOs.
>
> TTM defines the backend pools where the memory is then allocated from to
> fill the BOs.
>
> So you are mixing frontend and backend here.
>
> In other words for the whole cgroup interface you should not make a
> single change to amdgpu_drm.h or otherwise you are doing something wrong.
>
> Regards,
> Christian.
>
> >
> > Reards,
> > Kenny
> >
> >>> +
> >>> +extern char const *amdgpu_mem_domain_names[];
> >>> +
> >>>/**
> >>> * DOC: memory domains
> >>> *
> >>> @@ -95,12 +107,12 @@ extern "C" {
> >>> * %AMDGPU_GEM_DOMAIN_OAOrdered append, used by 3D or Compute 
> >>> engines
> >>> * for appending data.
> >>> */
> >>> -#define AMDGPU_GEM_DOMAIN_CPU0x1
> >>> -#define AMDGPU_GEM_DOMAIN_GTT0x2
> >>> -#define AMDGPU_GEM_DOMAIN_VRAM   0x4
> >>> -#define AMDGPU_GEM_DOMAIN_GDS0x8
> >>> -#define AMDGPU_GEM_DOMAIN_GWS0x10
> >>> -#define AMDGPU_GEM_DOMAIN_OA 0x20
> >>> +#define AMDGPU_GEM_DOMAIN_CPU(1 << AMDGPU_MEM_DOMAIN_CPU)
> >>> +#define AMDGPU_GEM_DOMAIN_GTT(1 << AMDGPU_MEM_DOMAIN_GTT)
> >>> +#define AMDGPU_GEM_DOMAIN_VRAM   (1 << 
> >>> AMDGPU_MEM_DOMAIN_VRAM)
> >>> +#define AMDGPU_GEM_DOMAIN_GDS(1 << AMDGPU_MEM_DOMAIN_GDS)
> >>> +#define AMDGPU_GEM_DOMAIN_GWS(1 << AMDGPU_MEM_DOMAIN_GWS)
> >>> +#define AMDGPU_GEM_DOMAIN_OA (1 << AMDGPU_MEM_DOMAIN_OA)
> >>>#define AMDGPU_GEM_DOMAIN_MASK  (AMDGPU_GEM_DOMAIN_CPU | \
> >>> AMDGPU_GEM_DOMAIN_GTT | \
> >>> AMDGPU_GEM_DOMAIN_VRAM | \
> > ___
> > amd-gfx mailing list
> > amd-gfx@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [RFC PATCH v2 4/5] drm, cgroup: Add total GEM buffer allocation limit

2019-05-15 Thread Kenny Ho
On Wed, May 15, 2019 at 5:26 PM Welty, Brian  wrote:
> On 5/9/2019 2:04 PM, Kenny Ho wrote:
> > There are four control file types,
> > stats (ro) - display current measured values for a resource
> > max (rw) - limits for a resource
> > default (ro, root cgroup only) - default values for a resource
> > help (ro, root cgroup only) - help string for a resource
> >
> > Each file is multi-lined with one entry/line per drm device.
>
> Multi-line is correct for multiple devices, but I believe you need
> to use a KEY to denote device for both your set and get routines.
> I didn't see your set functions reading a key, or the get functions
> printing the key in output.
> cgroups-v2 conventions mention using KEY of major:minor, but I think
> you can use drm_minor as key?
Given this controller is specific to the drm kernel subsystem which
uses minor to identify drm device, I don't see a need to complicate
the interfaces more by having major and a key.  As you can see in the
examples below, the drm device minor corresponds to the line number.
I am not sure how strict cgroup upstream is about the convention but I
am hoping there are flexibility here to allow for what I have
implemented.  There are a couple of other things I have done that is
not described in the convention: 1) inclusion of read-only *.help file
at the root cgroup, 2) use read-only (which I can potentially make rw)
*.default file instead of having a default entries (since the default
can be different for different devices) inside the control files (this
way, the resetting of cgroup values for all the drm devices, can be
done by a simple 'cp'.)

> > Usage examples:
> > // set limit for card1 to 1GB
> > sed -i '2s/.*/1073741824/' /sys/fs/cgroup//drm.buffer.total.max
> >
> > // set limit for card0 to 512MB
> > sed -i '1s/.*/536870912/' /sys/fs/cgroup//drm.buffer.total.max


> >  /** @file drm_gem.c
> > @@ -154,6 +156,9 @@ void drm_gem_private_object_init(struct drm_device *dev,
> >   obj->handle_count = 0;
> >   obj->size = size;
> >   drm_vma_node_reset(>vma_node);
> > +
> > + obj->drmcgrp = get_drmcgrp(current);
> > + drmcgrp_chg_bo_alloc(obj->drmcgrp, dev, size);
>
> Why do the charging here?
> There is no backing store yet for the buffer, so this is really tracking 
> something akin to allowed virtual memory for GEM objects?
> Is this really useful for an administrator to control?
> Isn't the resource we want to control actually the physical backing store?
That's correct.  This is just the first level of control since the
backing store can be backed by different type of memory.  I am in the
process of adding at least two more resources.  Stay tuned.  I am
doing the charge here to enforce the idea of "creator is deemed owner"
at a place where the code is shared by all (the init function.)

> > + while (i <= max_minor && limits != NULL) {
> > + sval =  strsep(, "\n");
> > + rc = kstrtoll(sval, 0, );
>
> Input should be "KEY VALUE", so KEY will determine device to apply this to.
> Also, per cgroups-v2 documentation of limits, I believe need to parse and 
> handle the special "max" input value.
>
> parse_resources() in rdma controller is example for both of above.
Please see my previous reply for the rationale of my hope to not need
a key.  I can certainly add handling of "max" and "default".


> > +void drmcgrp_chg_bo_alloc(struct drmcgrp *drmcgrp, struct drm_device *dev,
> > + size_t size)
>
> Shouldn't this return an error and be implemented with same semantics as the
> try_charge() functions of other controllers?
> Below will allow stats_total_allocated to overrun limits_total_allocated.
This is because I am charging the buffer at the init of the buffer
which does not fail so the "try" (drmcgrp_bo_can_allocate) is separate
and placed earlier and nearer other condition where gem object
allocation may fail.  In other words, there are multiple possibilities
for which gem allocation may fail (cgroup limit being one of them) and
satisfying cgroup limit does not mean a charge is needed.  I can
certainly combine the two functions to have an additional try_charge
semantic as well if that is really needed.

Regards,
Kenny
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [RFC PATCH v2 4/5] drm, cgroup: Add total GEM buffer allocation limit

2019-05-10 Thread Kenny Ho
On Fri, May 10, 2019 at 1:48 PM Koenig, Christian
 wrote:
> Well another question is why do we want to prevent that in the first place?
>
> I mean the worst thing that can happen is that we account a BO multiple
> times.
That's one of the problems.  The other one is the BO outliving the
lifetime of a cgroup and there's no good way to un-charge the usage
when the BO is free so the count won't be accurate.

I have looked into two possible solutions.  One is to prevent cgroup
from being removed when there are BOs owned by the cgroup still alive
(similar to how cgroup removal will fail if it still has processes
attached to it.)  My concern here is the possibility of not able to
remove a cgroup forever due to the lifetime of a BO (continuously
being shared and reuse and never die.)  Perhaps you can shed some
light on this possibility.

The other one is to keep track of all the buffers and migrate them to
the parent if a cgroup is closed.  My concern here is the performance
overhead on tracking all the buffers.

> And going into the same direction where is the code to handle an open
> device file descriptor which is send from one cgroup to another?
I looked into this before but I forgot what I found.  Perhaps folks
familiar with device cgroup can chime in.

Actually, just did another quick search right now.  Looks like the
access is enforced at the inode level (__devcgroup_check_permission)
so the fd sent to another cgroup that does not have access to the
device should still not have access.

Regards,
Kenny


> Regards,
> Christian.
>
> >
> > Regards,
> > Kenny
> >
> >>> On the other hand, if there are expectations for resource management
> >>> between containers, I would like to know who is the expected manager
> >>> and how does it fit into the concept of container (which enforce some
> >>> level of isolation.)  One possible manager may be the display server.
> >>> But as long as the display server is in a parent cgroup of the apps'
> >>> cgroup, the apps can still import handles from the display server
> >>> under the current implementation.  My understanding is that this is
> >>> most likely the case, with the display server simply sitting at the
> >>> default/root cgroup.  But I certainly want to hear more about other
> >>> use cases (for example, is running multiple display servers on a
> >>> single host a realistic possibility?  Are there people running
> >>> multiple display servers inside peer containers?  If so, how do they
> >>> coordinate resources?)
> >> We definitely have situations with multiple display servers running
> >> (just think of VR).
> >>
> >> I just can't say if they currently use cgroups in any way.
> >>
> >> Thanks,
> >> Christian.
> >>
> >>> I should probably summarize some of these into the commit message.
> >>>
> >>> Regards,
> >>> Kenny
> >>>
> >>>
> >>>
>  Christian.
> 
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[RFC PATCH v2 1/5] cgroup: Introduce cgroup for drm subsystem

2019-05-09 Thread Kenny Ho
Change-Id: I6830d3990f63f0c13abeba29b1d330cf28882831
Signed-off-by: Kenny Ho 
---
 include/linux/cgroup_drm.h| 32 ++
 include/linux/cgroup_subsys.h |  4 
 init/Kconfig  |  5 +
 kernel/cgroup/Makefile|  1 +
 kernel/cgroup/drm.c   | 42 +++
 5 files changed, 84 insertions(+)
 create mode 100644 include/linux/cgroup_drm.h
 create mode 100644 kernel/cgroup/drm.c

diff --git a/include/linux/cgroup_drm.h b/include/linux/cgroup_drm.h
new file mode 100644
index ..121001be1230
--- /dev/null
+++ b/include/linux/cgroup_drm.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: MIT
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ */
+#ifndef _CGROUP_DRM_H
+#define _CGROUP_DRM_H
+
+#ifdef CONFIG_CGROUP_DRM
+
+#include 
+
+struct drmcgrp {
+   struct cgroup_subsys_state  css;
+};
+
+static inline struct drmcgrp *css_drmcgrp(struct cgroup_subsys_state *css)
+{
+   return css ? container_of(css, struct drmcgrp, css) : NULL;
+}
+
+static inline struct drmcgrp *get_drmcgrp(struct task_struct *task)
+{
+   return css_drmcgrp(task_get_css(task, drm_cgrp_id));
+}
+
+
+static inline struct drmcgrp *parent_drmcgrp(struct drmcgrp *cg)
+{
+   return css_drmcgrp(cg->css.parent);
+}
+
+#endif /* CONFIG_CGROUP_DRM */
+#endif /* _CGROUP_DRM_H */
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index acb77dcff3b4..ddedad809e8b 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -61,6 +61,10 @@ SUBSYS(pids)
 SUBSYS(rdma)
 #endif
 
+#if IS_ENABLED(CONFIG_CGROUP_DRM)
+SUBSYS(drm)
+#endif
+
 /*
  * The following subsystems are not supported on the default hierarchy.
  */
diff --git a/init/Kconfig b/init/Kconfig
index d47cb77a220e..0b0f112eb23b 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -839,6 +839,11 @@ config CGROUP_RDMA
  Attaching processes with active RDMA resources to the cgroup
  hierarchy is allowed even if can cross the hierarchy's limit.
 
+config CGROUP_DRM
+   bool "DRM controller (EXPERIMENTAL)"
+   help
+ Provides accounting and enforcement of resources in the DRM subsystem.
+
 config CGROUP_FREEZER
bool "Freezer controller"
help
diff --git a/kernel/cgroup/Makefile b/kernel/cgroup/Makefile
index bfcdae896122..6af14bd93050 100644
--- a/kernel/cgroup/Makefile
+++ b/kernel/cgroup/Makefile
@@ -4,5 +4,6 @@ obj-y := cgroup.o rstat.o namespace.o cgroup-v1.o
 obj-$(CONFIG_CGROUP_FREEZER) += freezer.o
 obj-$(CONFIG_CGROUP_PIDS) += pids.o
 obj-$(CONFIG_CGROUP_RDMA) += rdma.o
+obj-$(CONFIG_CGROUP_DRM) += drm.o
 obj-$(CONFIG_CPUSETS) += cpuset.o
 obj-$(CONFIG_CGROUP_DEBUG) += debug.o
diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
new file mode 100644
index ..620c887d6d24
--- /dev/null
+++ b/kernel/cgroup/drm.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: MIT
+// Copyright 2019 Advanced Micro Devices, Inc.
+#include 
+#include 
+#include 
+
+static struct drmcgrp *root_drmcgrp __read_mostly;
+
+static void drmcgrp_css_free(struct cgroup_subsys_state *css)
+{
+   struct drmcgrp *drmcgrp = css_drmcgrp(css);
+
+   kfree(css_drmcgrp(css));
+}
+
+static struct cgroup_subsys_state *
+drmcgrp_css_alloc(struct cgroup_subsys_state *parent_css)
+{
+   struct drmcgrp *parent = css_drmcgrp(parent_css);
+   struct drmcgrp *drmcgrp;
+
+   drmcgrp = kzalloc(sizeof(struct drmcgrp), GFP_KERNEL);
+   if (!drmcgrp)
+   return ERR_PTR(-ENOMEM);
+
+   if (!parent)
+   root_drmcgrp = drmcgrp;
+
+   return >css;
+}
+
+struct cftype files[] = {
+   { } /* terminate */
+};
+
+struct cgroup_subsys drm_cgrp_subsys = {
+   .css_alloc  = drmcgrp_css_alloc,
+   .css_free   = drmcgrp_css_free,
+   .early_init = false,
+   .legacy_cftypes = files,
+   .dfl_cftypes= files,
+};
-- 
2.21.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[RFC PATCH v2 5/5] drm, cgroup: Add peak GEM buffer allocation limit

2019-05-09 Thread Kenny Ho
This new drmcgrp resource limits the largest GEM buffer that can be
allocated in a cgroup.

Change-Id: I0830d56775568e1cf215b56cc892d5e7945e9f25
Signed-off-by: Kenny Ho 
---
 include/linux/cgroup_drm.h |  2 ++
 kernel/cgroup/drm.c| 59 ++
 2 files changed, 61 insertions(+)

diff --git a/include/linux/cgroup_drm.h b/include/linux/cgroup_drm.h
index fe14ba7bb1cf..57c07a148975 100644
--- a/include/linux/cgroup_drm.h
+++ b/include/linux/cgroup_drm.h
@@ -16,8 +16,10 @@
 struct drmcgrp_device_resource {
/* for per device stats */
s64 bo_stats_total_allocated;
+   size_t  bo_stats_peak_allocated;
 
s64 bo_limits_total_allocated;
+   size_t  bo_limits_peak_allocated;
 };
 
 struct drmcgrp {
diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
index bc3abff09113..5c7e1b8059ce 100644
--- a/kernel/cgroup/drm.c
+++ b/kernel/cgroup/drm.c
@@ -17,6 +17,7 @@ struct drmcgrp_device {
struct mutexmutex;
 
s64 bo_limits_total_allocated_default;
+   size_t  bo_limits_peak_allocated_default;
 };
 
 #define DRMCG_CTF_PRIV_SIZE 3
@@ -24,6 +25,7 @@ struct drmcgrp_device {
 
 enum drmcgrp_res_type {
DRMCGRP_TYPE_BO_TOTAL,
+   DRMCGRP_TYPE_BO_PEAK,
 };
 
 enum drmcgrp_file_type {
@@ -72,6 +74,9 @@ static inline int init_drmcgrp_single(struct drmcgrp 
*drmcgrp, int i)
if (known_drmcgrp_devs[i] != NULL) {
ddr->bo_limits_total_allocated =
  known_drmcgrp_devs[i]->bo_limits_total_allocated_default;
+
+   ddr->bo_limits_peak_allocated =
+ known_drmcgrp_devs[i]->bo_limits_peak_allocated_default;
}
 
return 0;
@@ -131,6 +136,9 @@ static inline void drmcgrp_print_stats(struct 
drmcgrp_device_resource *ddr,
case DRMCGRP_TYPE_BO_TOTAL:
seq_printf(sf, "%lld\n", ddr->bo_stats_total_allocated);
break;
+   case DRMCGRP_TYPE_BO_PEAK:
+   seq_printf(sf, "%zu\n", ddr->bo_stats_peak_allocated);
+   break;
default:
seq_puts(sf, "\n");
break;
@@ -149,6 +157,9 @@ static inline void drmcgrp_print_limits(struct 
drmcgrp_device_resource *ddr,
case DRMCGRP_TYPE_BO_TOTAL:
seq_printf(sf, "%lld\n", ddr->bo_limits_total_allocated);
break;
+   case DRMCGRP_TYPE_BO_PEAK:
+   seq_printf(sf, "%zu\n", ddr->bo_limits_peak_allocated);
+   break;
default:
seq_puts(sf, "\n");
break;
@@ -167,6 +178,9 @@ static inline void drmcgrp_print_default(struct 
drmcgrp_device *ddev,
case DRMCGRP_TYPE_BO_TOTAL:
seq_printf(sf, "%lld\n", 
ddev->bo_limits_total_allocated_default);
break;
+   case DRMCGRP_TYPE_BO_PEAK:
+   seq_printf(sf, "%zu\n", ddev->bo_limits_peak_allocated_default);
+   break;
default:
seq_puts(sf, "\n");
break;
@@ -182,6 +196,11 @@ static inline void drmcgrp_print_help(int cardNum, struct 
seq_file *sf,
"Total amount of buffer allocation in bytes for card%d\n",
cardNum);
break;
+   case DRMCGRP_TYPE_BO_PEAK:
+   seq_printf(sf,
+   "Largest buffer allocation in bytes for card%d\n",
+   cardNum);
+   break;
default:
seq_puts(sf, "\n");
break;
@@ -254,6 +273,10 @@ ssize_t drmcgrp_bo_limit_write(struct kernfs_open_file 
*of, char *buf,
 if (val < 0) continue;
ddr->bo_limits_total_allocated = val;
break;
+   case DRMCGRP_TYPE_BO_PEAK:
+if (val < 0) continue;
+   ddr->bo_limits_peak_allocated = val;
+   break;
default:
break;
}
@@ -300,6 +323,33 @@ struct cftype files[] = {
.private = (DRMCGRP_TYPE_BO_TOTAL << DRMCG_CTF_PRIV_SIZE) |
DRMCGRP_FTYPE_MAX,
},
+   {
+   .name = "buffer.peak.stats",
+   .seq_show = drmcgrp_bo_show,
+   .private = (DRMCGRP_TYPE_BO_PEAK << DRMCG_CTF_PRIV_SIZE) |
+   DRMCGRP_FTYPE_STATS,
+   },
+   {
+   .name = "buffer.peak.default",
+   .seq_show = drmcgrp_bo_show,
+   .flags = CFTYPE_ONLY_ON_ROOT,
+   .private = (DRMCGRP_TYPE_BO_PEAK << DR

[RFC PATCH v2 0/5] new cgroup controller for gpu/drm subsystem

2019-05-09 Thread Kenny Ho
This is a follow up to the RFC I made last november to introduce a cgroup 
controller for the GPU/DRM subsystem [a].  The goal is to be able to provide 
resource management to GPU resources using things like container.  The cover 
letter from v1 is copied below for reference.

Usage examples:
// set limit for card1 to 1GB
sed -i '2s/.*/1073741824/' /sys/fs/cgroup//drm.buffer.total.max

// set limit for card0 to 512MB
sed -i '1s/.*/536870912/' /sys/fs/cgroup//drm.buffer.total.max


v2:
* Removed the vendoring concepts
* Add limit to total buffer allocation
* Add limit to the maximum size of a buffer allocation

TODO: process migration
TODO: documentations

[a]: https://lists.freedesktop.org/archives/dri-devel/2018-November/197106.html

v1: cover letter

The purpose of this patch series is to start a discussion for a generic cgroup
controller for the drm subsystem.  The design proposed here is a very early one.
We are hoping to engage the community as we develop the idea.


Backgrounds
==
Control Groups/cgroup provide a mechanism for aggregating/partitioning sets of
tasks, and all their future children, into hierarchical groups with specialized
behaviour, such as accounting/limiting the resources which processes in a cgroup
can access[1].  Weights, limits, protections, allocations are the main resource
distribution models.  Existing cgroup controllers includes cpu, memory, io,
rdma, and more.  cgroup is one of the foundational technologies that enables the
popular container application deployment and management method.

Direct Rendering Manager/drm contains code intended to support the needs of
complex graphics devices. Graphics drivers in the kernel may make use of DRM
functions to make tasks like memory management, interrupt handling and DMA
easier, and provide a uniform interface to applications.  The DRM has also
developed beyond traditional graphics applications to support compute/GPGPU
applications.


Motivations
=
As GPU grow beyond the realm of desktop/workstation graphics into areas like
data center clusters and IoT, there are increasing needs to monitor and regulate
GPU as a resource like cpu, memory and io.

Matt Roper from Intel began working on similar idea in early 2018 [2] for the
purpose of managing GPU priority using the cgroup hierarchy.  While that
particular use case may not warrant a standalone drm cgroup controller, there
are other use cases where having one can be useful [3].  Monitoring GPU
resources such as VRAM and buffers, CU (compute unit [AMD's nomenclature])/EU
(execution unit [Intel's nomenclature]), GPU job scheduling [4] can help
sysadmins get a better understanding of the applications usage profile.  Further
usage regulations of the aforementioned resources can also help sysadmins
optimize workload deployment on limited GPU resources.

With the increased importance of machine learning, data science and other
cloud-based applications, GPUs are already in production use in data centers
today [5,6,7].  Existing GPU resource management is very course grain, however,
as sysadmins are only able to distribute workload on a per-GPU basis [8].  An
alternative is to use GPU virtualization (with or without SRIOV) but it
generally acts on the entire GPU instead of the specific resources in a GPU.
With a drm cgroup controller, we can enable alternate, fine-grain, sub-GPU
resource management (in addition to what may be available via GPU
virtualization.)

In addition to production use, the DRM cgroup can also help with testing
graphics application robustness by providing a mean to artificially limit DRM
resources availble to the applications.

Challenges

While there are common infrastructure in DRM that is shared across many vendors
(the scheduler [4] for example), there are also aspects of DRM that are vendor
specific.  To accommodate this, we borrowed the mechanism used by the cgroup to
handle different kinds of cgroup controller.

Resources for DRM are also often device (GPU) specific instead of system
specific and a system may contain more than one GPU.  For this, we borrowed some
of the ideas from RDMA cgroup controller.

Approach
===
To experiment with the idea of a DRM cgroup, we would like to start with basic
accounting and statistics, then continue to iterate and add regulating
mechanisms into the driver.

[1] https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
[2] https://lists.freedesktop.org/archives/intel-gfx/2018-January/153156.html
[3] https://www.spinics.net/lists/cgroups/msg20720.html
[4] https://elixir.bootlin.com/linux/latest/source/drivers/gpu/drm/scheduler
[5] https://kubernetes.io/docs/tasks/manage-gpus/scheduling-gpus/
[6] 
https://blog.openshift.com/gpu-accelerated-sql-queries-with-postgresql-pg-strom-in-openshift-3-10/
[7] https://github.com/RadeonOpenCompute/k8s-device-plugin
[8] https://github.com/kubernetes/kubernetes/issues/52757

Kenny Ho (5):
  cgroup: Introduce cgroup for drm subsystem
  cgroup: Add mechanism

[RFC PATCH v2 3/5] drm/amdgpu: Register AMD devices for DRM cgroup

2019-05-09 Thread Kenny Ho
Change-Id: I3750fc657b956b52750a36cb303c54fa6a265b44
Signed-off-by: Kenny Ho 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index da7b4fe8ade3..2568fd730161 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -28,6 +28,7 @@
 #include 
 #include "amdgpu.h"
 #include 
+#include 
 #include "amdgpu_sched.h"
 #include "amdgpu_uvd.h"
 #include "amdgpu_vce.h"
@@ -97,6 +98,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev)
 
amdgpu_device_fini(adev);
 
+   drmcgrp_unregister_device(dev);
 done_free:
kfree(adev);
dev->dev_private = NULL;
@@ -141,6 +143,8 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned 
long flags)
struct amdgpu_device *adev;
int r, acpi_status;
 
+   drmcgrp_register_device(dev);
+
 #ifdef CONFIG_DRM_AMDGPU_SI
if (!amdgpu_si_support) {
switch (flags & AMD_ASIC_MASK) {
-- 
2.21.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[RFC PATCH v2 4/5] drm, cgroup: Add total GEM buffer allocation limit

2019-05-09 Thread Kenny Ho
The drm resource being measured and limited here is the GEM buffer
objects.  User applications allocate and free these buffers.  In
addition, a process can allocate a buffer and share it with another
process.  The consumer of a shared buffer can also outlive the
allocator of the buffer.

For the purpose of cgroup accounting and limiting, ownership of the
buffer is deemed to be the cgroup for which the allocating process
belongs to.  There is one limit per drm device.

In order to prevent the buffer outliving the cgroup that owns it, a
process is prevented from importing buffers that are not own by the
process' cgroup or the ancestors of the process' cgroup.

For this resource, the control files are prefixed with drm.buffer.total.

There are four control file types,
stats (ro) - display current measured values for a resource
max (rw) - limits for a resource
default (ro, root cgroup only) - default values for a resource
help (ro, root cgroup only) - help string for a resource

Each file is multi-lined with one entry/line per drm device.

Usage examples:
// set limit for card1 to 1GB
sed -i '2s/.*/1073741824/' /sys/fs/cgroup//drm.buffer.total.max

// set limit for card0 to 512MB
sed -i '1s/.*/536870912/' /sys/fs/cgroup//drm.buffer.total.max

Change-Id: I4c249d06d45ec709d6481d4cbe87c5168545c5d0
Signed-off-by: Kenny Ho 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |   4 +
 drivers/gpu/drm/drm_gem.c  |   7 +
 drivers/gpu/drm/drm_prime.c|   9 +
 include/drm/drm_cgroup.h   |  34 ++-
 include/drm/drm_gem.h  |  11 +
 include/linux/cgroup_drm.h |   3 +
 kernel/cgroup/drm.c| 280 +
 7 files changed, 346 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 93b2c5a48a71..b4c078b7ad63 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -34,6 +34,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_amdkfd.h"
@@ -446,6 +447,9 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
if (!amdgpu_bo_validate_size(adev, size, bp->domain))
return -ENOMEM;
 
+   if (!drmcgrp_bo_can_allocate(current, adev->ddev, size))
+   return -ENOMEM;
+
*bo_ptr = NULL;
 
acc_size = ttm_bo_dma_acc_size(>mman.bdev, size,
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 6a80db077dc6..cbd49bf34dcf 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -37,10 +37,12 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
 #include 
+#include 
 #include "drm_internal.h"
 
 /** @file drm_gem.c
@@ -154,6 +156,9 @@ void drm_gem_private_object_init(struct drm_device *dev,
obj->handle_count = 0;
obj->size = size;
drm_vma_node_reset(>vma_node);
+
+   obj->drmcgrp = get_drmcgrp(current);
+   drmcgrp_chg_bo_alloc(obj->drmcgrp, dev, size);
 }
 EXPORT_SYMBOL(drm_gem_private_object_init);
 
@@ -804,6 +809,8 @@ drm_gem_object_release(struct drm_gem_object *obj)
if (obj->filp)
fput(obj->filp);
 
+   drmcgrp_unchg_bo_alloc(obj->drmcgrp, obj->dev, obj->size);
+
drm_gem_free_mmap_offset(obj);
 }
 EXPORT_SYMBOL(drm_gem_object_release);
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 231e3f6d5f41..faed5611a1c6 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -32,6 +32,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "drm_internal.h"
 
@@ -794,6 +795,7 @@ int drm_gem_prime_fd_to_handle(struct drm_device *dev,
 {
struct dma_buf *dma_buf;
struct drm_gem_object *obj;
+   struct drmcgrp *drmcgrp = get_drmcgrp(current);
int ret;
 
dma_buf = dma_buf_get(prime_fd);
@@ -818,6 +820,13 @@ int drm_gem_prime_fd_to_handle(struct drm_device *dev,
goto out_unlock;
}
 
+   /* only allow bo from the same cgroup or its ancestor to be imported */
+   if (drmcgrp != NULL &&
+   !drmcgrp_is_self_or_ancestor(drmcgrp, obj->drmcgrp)) {
+   ret = -EACCES;
+   goto out_unlock;
+   }
+
if (obj->dma_buf) {
WARN_ON(obj->dma_buf != dma_buf);
} else {
diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h
index ddb9eab64360..8711b7c5f7bf 100644
--- a/include/drm/drm_cgroup.h
+++ b/include/drm/drm_cgroup.h
@@ -4,12 +4,20 @@
 #ifndef __DRM_CGROUP_H__
 #define __DRM_CGROUP_H__
 
+#include 
+
 #ifdef CONFIG_CGROUP_DRM
 
 int drmcgrp_register_device(struct drm_device *device);
-
 int drmcgrp_unregister_device(struct drm_device *device);

[RFC PATCH v2 2/5] cgroup: Add mechanism to register DRM devices

2019-05-09 Thread Kenny Ho
Change-Id: I908ee6975ea0585e4c30eafde4599f87094d8c65
Signed-off-by: Kenny Ho 
---
 include/drm/drm_cgroup.h   |  24 
 include/linux/cgroup_drm.h |  10 
 kernel/cgroup/drm.c| 118 -
 3 files changed, 151 insertions(+), 1 deletion(-)
 create mode 100644 include/drm/drm_cgroup.h

diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h
new file mode 100644
index ..ddb9eab64360
--- /dev/null
+++ b/include/drm/drm_cgroup.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: MIT
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ */
+#ifndef __DRM_CGROUP_H__
+#define __DRM_CGROUP_H__
+
+#ifdef CONFIG_CGROUP_DRM
+
+int drmcgrp_register_device(struct drm_device *device);
+
+int drmcgrp_unregister_device(struct drm_device *device);
+
+#else
+static inline int drmcgrp_register_device(struct drm_device *device)
+{
+   return 0;
+}
+
+static inline int drmcgrp_unregister_device(struct drm_device *device)
+{
+   return 0;
+}
+#endif /* CONFIG_CGROUP_DRM */
+#endif /* __DRM_CGROUP_H__ */
diff --git a/include/linux/cgroup_drm.h b/include/linux/cgroup_drm.h
index 121001be1230..d7ccf434ca6b 100644
--- a/include/linux/cgroup_drm.h
+++ b/include/linux/cgroup_drm.h
@@ -6,10 +6,20 @@
 
 #ifdef CONFIG_CGROUP_DRM
 
+#include 
 #include 
+#include 
+
+/* limit defined per the way drm_minor_alloc operates */
+#define MAX_DRM_DEV (64 * DRM_MINOR_RENDER)
+
+struct drmcgrp_device_resource {
+   /* for per device stats */
+};
 
 struct drmcgrp {
struct cgroup_subsys_state  css;
+   struct drmcgrp_device_resource  *dev_resources[MAX_DRM_DEV];
 };
 
 static inline struct drmcgrp *css_drmcgrp(struct cgroup_subsys_state *css)
diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
index 620c887d6d24..f9ef4bf042d8 100644
--- a/kernel/cgroup/drm.c
+++ b/kernel/cgroup/drm.c
@@ -1,16 +1,79 @@
 // SPDX-License-Identifier: MIT
 // Copyright 2019 Advanced Micro Devices, Inc.
+#include 
 #include 
 #include 
+#include 
+#include 
+#include 
 #include 
+#include 
+#include 
+
+static DEFINE_MUTEX(drmcgrp_mutex);
+
+struct drmcgrp_device {
+   struct drm_device   *dev;
+   struct mutexmutex;
+};
+
+/* indexed by drm_minor for access speed */
+static struct drmcgrp_device   *known_drmcgrp_devs[MAX_DRM_DEV];
+
+static int max_minor;
+
 
 static struct drmcgrp *root_drmcgrp __read_mostly;
 
 static void drmcgrp_css_free(struct cgroup_subsys_state *css)
 {
struct drmcgrp *drmcgrp = css_drmcgrp(css);
+   int i;
+
+   for (i = 0; i <= max_minor; i++) {
+   if (drmcgrp->dev_resources[i] != NULL)
+   kfree(drmcgrp->dev_resources[i]);
+   }
+
+   kfree(drmcgrp);
+}
+
+static inline int init_drmcgrp_single(struct drmcgrp *drmcgrp, int i)
+{
+   struct drmcgrp_device_resource *ddr = drmcgrp->dev_resources[i];
+
+   if (ddr == NULL) {
+   ddr = kzalloc(sizeof(struct drmcgrp_device_resource),
+   GFP_KERNEL);
+
+   if (!ddr)
+   return -ENOMEM;
+
+   drmcgrp->dev_resources[i] = ddr;
+   }
+
+   /* set defaults here */
+
+   return 0;
+}
+
+static inline int init_drmcgrp(struct drmcgrp *drmcgrp, struct drm_device *dev)
+{
+   int rc = 0;
+   int i;
+
+   if (dev != NULL) {
+   rc = init_drmcgrp_single(drmcgrp, dev->primary->index);
+   return rc;
+   }
+
+   for (i = 0; i <= max_minor; i++) {
+   rc = init_drmcgrp_single(drmcgrp, i);
+   if (rc)
+   return rc;
+   }
 
-   kfree(css_drmcgrp(css));
+   return 0;
 }
 
 static struct cgroup_subsys_state *
@@ -18,11 +81,18 @@ drmcgrp_css_alloc(struct cgroup_subsys_state *parent_css)
 {
struct drmcgrp *parent = css_drmcgrp(parent_css);
struct drmcgrp *drmcgrp;
+   int rc;
 
drmcgrp = kzalloc(sizeof(struct drmcgrp), GFP_KERNEL);
if (!drmcgrp)
return ERR_PTR(-ENOMEM);
 
+   rc = init_drmcgrp(drmcgrp, NULL);
+   if (rc) {
+   drmcgrp_css_free(>css);
+   return ERR_PTR(rc);
+   }
+
if (!parent)
root_drmcgrp = drmcgrp;
 
@@ -40,3 +110,49 @@ struct cgroup_subsys drm_cgrp_subsys = {
.legacy_cftypes = files,
.dfl_cftypes= files,
 };
+
+int drmcgrp_register_device(struct drm_device *dev)
+{
+   struct drmcgrp_device *ddev;
+   struct cgroup_subsys_state *pos;
+   struct drmcgrp *child;
+
+   ddev = kzalloc(sizeof(struct drmcgrp_device), GFP_KERNEL);
+   if (!ddev)
+   return -ENOMEM;
+
+   ddev->dev = dev;
+   mutex_init(>mutex);
+
+   mutex_lock(_mutex);
+   known_drmcgrp_devs[dev->primary->index] = ddev;
+   max_minor = max(max_minor, dev->primary->index);
+   mutex_unlock(_mutex);
+
+   /* init cgroups 

Re: [RFC PATCH v2 4/5] drm, cgroup: Add total GEM buffer allocation limit

2019-05-10 Thread Kenny Ho
On Fri, May 10, 2019 at 8:28 AM Christian König
 wrote:
>
> Am 09.05.19 um 23:04 schrieb Kenny Ho:
> > + /* only allow bo from the same cgroup or its ancestor to be imported 
> > */
> > + if (drmcgrp != NULL &&
> > + !drmcgrp_is_self_or_ancestor(drmcgrp, obj->drmcgrp)) {
> > + ret = -EACCES;
> > + goto out_unlock;
> > + }
> > +
>
> This will most likely go up in flames.
>
> If I'm not completely mistaken we already use
> drm_gem_prime_fd_to_handle() to exchange handles between different
> cgroups in current container usages.
This is something that I am interested in getting more details from
the broader community because the details affect how likely this will
go up in flames ;).  Note that this check does not block sharing of
handles from cgroup parent to children in the hierarchy, nor does it
blocks sharing of handles within a cgroup.

I am interested to find out, when existing apps share handles between
containers, if there are any expectations on resource management.
Since there are no drm cgroup for current container usage, I expect
the answer to be no.  In this case, the drm cgroup controller can be
disabled on its own (in the context of cgroup-v2's unified hierarchy),
or the process can remain at the root for the drm cgroup hierarchy (in
the context of cgroup-v1.)  If I understand the cgroup api correctly,
that means all process would be part of the root cgroup as far as the
drm controller is concerned and this block will not come into effect.
I have verified that this is indeed the current default behaviour of a
container runtime (runc, which is used by docker, podman and others.)
The new drm cgroup controller is simply ignored and all processes
remain at the root of the hierarchy (since there are no other
cgroups.)  I plan to make contributions to runc (so folks can actually
use this features with docker/podman/k8s, etc.) once things stabilized
on the kernel side.

On the other hand, if there are expectations for resource management
between containers, I would like to know who is the expected manager
and how does it fit into the concept of container (which enforce some
level of isolation.)  One possible manager may be the display server.
But as long as the display server is in a parent cgroup of the apps'
cgroup, the apps can still import handles from the display server
under the current implementation.  My understanding is that this is
most likely the case, with the display server simply sitting at the
default/root cgroup.  But I certainly want to hear more about other
use cases (for example, is running multiple display servers on a
single host a realistic possibility?  Are there people running
multiple display servers inside peer containers?  If so, how do they
coordinate resources?)

I should probably summarize some of these into the commit message.

Regards,
Kenny



> Christian.
>
> >   if (obj->dma_buf) {
> >   WARN_ON(obj->dma_buf != dma_buf);
> >   } else {
> > diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h
> > index ddb9eab64360..8711b7c5f7bf 100644
> > --- a/include/drm/drm_cgroup.h
> > +++ b/include/drm/drm_cgroup.h
> > @@ -4,12 +4,20 @@
> >   #ifndef __DRM_CGROUP_H__
> >   #define __DRM_CGROUP_H__
> >
> > +#include 
> > +
> >   #ifdef CONFIG_CGROUP_DRM
> >
> >   int drmcgrp_register_device(struct drm_device *device);
> > -
> >   int drmcgrp_unregister_device(struct drm_device *device);
> > -
> > +bool drmcgrp_is_self_or_ancestor(struct drmcgrp *self,
> > + struct drmcgrp *relative);
> > +void drmcgrp_chg_bo_alloc(struct drmcgrp *drmcgrp, struct drm_device *dev,
> > + size_t size);
> > +void drmcgrp_unchg_bo_alloc(struct drmcgrp *drmcgrp, struct drm_device 
> > *dev,
> > + size_t size);
> > +bool drmcgrp_bo_can_allocate(struct task_struct *task, struct drm_device 
> > *dev,
> > + size_t size);
> >   #else
> >   static inline int drmcgrp_register_device(struct drm_device *device)
> >   {
> > @@ -20,5 +28,27 @@ static inline int drmcgrp_unregister_device(struct 
> > drm_device *device)
> >   {
> >   return 0;
> >   }
> > +
> > +static inline bool drmcgrp_is_self_or_ancestor(struct drmcgrp *self,
> > + struct drmcgrp *relative)
> > +{
> > + return false;
> > +}
> > +
> > +static inline void drmcgrp_chg_bo_alloc(struct drmcgrp *drmcgrp,
> > + struct drm_device *dev, size_t size)
> > +{
> > +}
> > +
> > +static inline void drmcgrp_unchg_bo_alloc(struct drmcgrp *drmcgrp,
> > + struct drm_device *dev, size_t si

Re: [RFC PATCH v2 0/5] new cgroup controller for gpu/drm subsystem

2019-05-10 Thread Kenny Ho
On Fri, May 10, 2019 at 8:31 AM Christian König
 wrote:
>
> I think it is a good approach to try to add a global limit first and
> when that's working go ahead with limiting device specific resources.
What are some of the global drm resource limit/allocation that would
be useful to implement? I would be happy to dig into those.

Regards,
Kenny


> The only major issue I can see is on patch #4, see there for further
> details.
>
> Christian.
>
> Am 09.05.19 um 23:04 schrieb Kenny Ho:
> > This is a follow up to the RFC I made last november to introduce a cgroup 
> > controller for the GPU/DRM subsystem [a].  The goal is to be able to 
> > provide resource management to GPU resources using things like container.  
> > The cover letter from v1 is copied below for reference.
> >
> > Usage examples:
> > // set limit for card1 to 1GB
> > sed -i '2s/.*/1073741824/' /sys/fs/cgroup//drm.buffer.total.max
> >
> > // set limit for card0 to 512MB
> > sed -i '1s/.*/536870912/' /sys/fs/cgroup//drm.buffer.total.max
> >
> >
> > v2:
> > * Removed the vendoring concepts
> > * Add limit to total buffer allocation
> > * Add limit to the maximum size of a buffer allocation
> >
> > TODO: process migration
> > TODO: documentations
> >
> > [a]: 
> > https://lists.freedesktop.org/archives/dri-devel/2018-November/197106.html
> >
> > v1: cover letter
> >
> > The purpose of this patch series is to start a discussion for a generic 
> > cgroup
> > controller for the drm subsystem.  The design proposed here is a very early 
> > one.
> > We are hoping to engage the community as we develop the idea.
> >
> >
> > Backgrounds
> > ==
> > Control Groups/cgroup provide a mechanism for aggregating/partitioning sets 
> > of
> > tasks, and all their future children, into hierarchical groups with 
> > specialized
> > behaviour, such as accounting/limiting the resources which processes in a 
> > cgroup
> > can access[1].  Weights, limits, protections, allocations are the main 
> > resource
> > distribution models.  Existing cgroup controllers includes cpu, memory, io,
> > rdma, and more.  cgroup is one of the foundational technologies that 
> > enables the
> > popular container application deployment and management method.
> >
> > Direct Rendering Manager/drm contains code intended to support the needs of
> > complex graphics devices. Graphics drivers in the kernel may make use of DRM
> > functions to make tasks like memory management, interrupt handling and DMA
> > easier, and provide a uniform interface to applications.  The DRM has also
> > developed beyond traditional graphics applications to support compute/GPGPU
> > applications.
> >
> >
> > Motivations
> > =
> > As GPU grow beyond the realm of desktop/workstation graphics into areas like
> > data center clusters and IoT, there are increasing needs to monitor and 
> > regulate
> > GPU as a resource like cpu, memory and io.
> >
> > Matt Roper from Intel began working on similar idea in early 2018 [2] for 
> > the
> > purpose of managing GPU priority using the cgroup hierarchy.  While that
> > particular use case may not warrant a standalone drm cgroup controller, 
> > there
> > are other use cases where having one can be useful [3].  Monitoring GPU
> > resources such as VRAM and buffers, CU (compute unit [AMD's 
> > nomenclature])/EU
> > (execution unit [Intel's nomenclature]), GPU job scheduling [4] can help
> > sysadmins get a better understanding of the applications usage profile.  
> > Further
> > usage regulations of the aforementioned resources can also help sysadmins
> > optimize workload deployment on limited GPU resources.
> >
> > With the increased importance of machine learning, data science and other
> > cloud-based applications, GPUs are already in production use in data centers
> > today [5,6,7].  Existing GPU resource management is very course grain, 
> > however,
> > as sysadmins are only able to distribute workload on a per-GPU basis [8].  
> > An
> > alternative is to use GPU virtualization (with or without SRIOV) but it
> > generally acts on the entire GPU instead of the specific resources in a GPU.
> > With a drm cgroup controller, we can enable alternate, fine-grain, sub-GPU
> > resource management (in addition to what may be available via GPU
> > virtualization.)
> >
> > In addition to production use, the DRM cgroup can also help with testing
> > graphics application robustness by pr

Re: [RFC PATCH v2 4/5] drm, cgroup: Add total GEM buffer allocation limit

2019-05-10 Thread Kenny Ho
On Fri, May 10, 2019 at 11:08 AM Koenig, Christian
 wrote:
> Am 10.05.19 um 16:57 schrieb Kenny Ho:
> > On Fri, May 10, 2019 at 8:28 AM Christian König
> >  wrote:
> >> Am 09.05.19 um 23:04 schrieb Kenny Ho:
> So the drm cgroup container is separate to other cgroup containers?
In cgroup-v1, which is most widely deployed currently, all controllers
have their own hierarchy (see /sys/fs/cgroup/).  In cgroup-v2, the
hierarchy is unified by individual controllers can be disabled (I
believe, I am not super familiar with v2.)

> In other words as long as userspace doesn't change, this wouldn't have
> any effect?
As far as things like docker and podman is concern, yes.  I am not
sure about the behaviour of others like lxc, lxd, etc. because I
haven't used those myself.

> Well that is unexpected cause then a processes would be in different
> groups for different controllers, but if that's really the case that
> would certainly work.
I believe this is a possibility for v1 and is why folks came up with
the unified hierarchy in v2 to solve some of the issues.
https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html#issues-with-v1-and-rationales-for-v2

Regards,
Kenny

> > On the other hand, if there are expectations for resource management
> > between containers, I would like to know who is the expected manager
> > and how does it fit into the concept of container (which enforce some
> > level of isolation.)  One possible manager may be the display server.
> > But as long as the display server is in a parent cgroup of the apps'
> > cgroup, the apps can still import handles from the display server
> > under the current implementation.  My understanding is that this is
> > most likely the case, with the display server simply sitting at the
> > default/root cgroup.  But I certainly want to hear more about other
> > use cases (for example, is running multiple display servers on a
> > single host a realistic possibility?  Are there people running
> > multiple display servers inside peer containers?  If so, how do they
> > coordinate resources?)
>
> We definitely have situations with multiple display servers running
> (just think of VR).
>
> I just can't say if they currently use cgroups in any way.
>
> Thanks,
> Christian.
>
> >
> > I should probably summarize some of these into the commit message.
> >
> > Regards,
> > Kenny
> >
> >
> >
> >> Christian.
> >>
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [RFC PATCH v2 4/5] drm, cgroup: Add total GEM buffer allocation limit

2019-05-16 Thread Kenny Ho
On Thu, May 16, 2019 at 3:25 AM Christian König
 wrote:
> Am 16.05.19 um 09:16 schrieb Koenig, Christian:
> > Am 16.05.19 um 04:29 schrieb Kenny Ho:
> >> On Wed, May 15, 2019 at 5:26 PM Welty, Brian  wrote:
> >>> On 5/9/2019 2:04 PM, Kenny Ho wrote:
> >>>> Each file is multi-lined with one entry/line per drm device.
> >>> Multi-line is correct for multiple devices, but I believe you need
> >>> to use a KEY to denote device for both your set and get routines.
> >>> I didn't see your set functions reading a key, or the get functions
> >>> printing the key in output.
> >>> cgroups-v2 conventions mention using KEY of major:minor, but I think
> >>> you can use drm_minor as key?
> >> Given this controller is specific to the drm kernel subsystem which
> >> uses minor to identify drm device,
> > Wait a second, using the DRM minor is a good idea in the first place.
> Well that should have read "is not a good idea"..
>
> I have a test system with a Vega10 and a Vega20. Which device gets which
> minor is not stable, but rather defined by the scan order of the PCIe bus.
>
> Normally the scan order is always the same, but adding or removing
> devices or delaying things just a little bit during init is enough to
> change this.
>
> We need something like the Linux sysfs location or similar to have a
> stable implementation.

I get that, which is why I don't use minor to identify cards in user
space apps I wrote:
https://github.com/RadeonOpenCompute/k8s-device-plugin/blob/c2659c9d1d0713cad36fb5256681125121e6e32f/internal/pkg/amdgpu/amdgpu.go#L85

But within the kernel, I think my use of minor is consistent with the
rest of the drm subsystem.  I hope I don't need to reform the way the
drm subsystem use minor in order to introduce a cgroup controller.

Regards,
Kenny
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [RFC PATCH v2 4/5] drm, cgroup: Add total GEM buffer allocation limit

2019-05-16 Thread Kenny Ho
On Thu, May 16, 2019 at 10:10 AM Tejun Heo  wrote:
> I haven't gone through the patchset yet but some quick comments.
>
> On Wed, May 15, 2019 at 10:29:21PM -0400, Kenny Ho wrote:
> > Given this controller is specific to the drm kernel subsystem which
> > uses minor to identify drm device, I don't see a need to complicate
> > the interfaces more by having major and a key.  As you can see in the
> > examples below, the drm device minor corresponds to the line number.
> > I am not sure how strict cgroup upstream is about the convention but I
>
> We're pretty strict.
>
> > am hoping there are flexibility here to allow for what I have
> > implemented.  There are a couple of other things I have done that is
>
> So, please follow the interface conventions.  We can definitely add
> new ones but that would need functional reasons.
>
> > not described in the convention: 1) inclusion of read-only *.help file
> > at the root cgroup, 2) use read-only (which I can potentially make rw)
> > *.default file instead of having a default entries (since the default
> > can be different for different devices) inside the control files (this
> > way, the resetting of cgroup values for all the drm devices, can be
> > done by a simple 'cp'.)
>
> Again, please follow the existing conventions.  There's a lot more
> harm than good in every controller being creative in their own way.
> It's trivial to build convenience features in userspace.  Please do it
> there.
I can certainly remove the ro *.help file and leave the documentation
to Documentation/, but for the *.default I do have a functional reason
to it.  As far as I can tell from the convention, the default is per
cgroup and there is no way to describe per device default.  Although,
perhaps we are talking about two different kinds of defaults.  Anyway,
I can leave the discussion to a more detailed review.

Regards,
Kenny
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [RFC PATCH v2 4/5] drm, cgroup: Add total GEM buffer allocation limit

2019-05-16 Thread Kenny Ho
On Thu, May 16, 2019 at 10:12 AM Christian König
 wrote:
> Am 16.05.19 um 16:03 schrieb Kenny Ho:
> > On Thu, May 16, 2019 at 3:25 AM Christian König
> >  wrote:
> >> Am 16.05.19 um 09:16 schrieb Koenig, Christian:
> >> We need something like the Linux sysfs location or similar to have a
> >> stable implementation.
> > I get that, which is why I don't use minor to identify cards in user
> > space apps I wrote:
> > https://github.com/RadeonOpenCompute/k8s-device-plugin/blob/c2659c9d1d0713cad36fb5256681125121e6e32f/internal/pkg/amdgpu/amdgpu.go#L85
>
> Yeah, that is certainly a possibility.
>
> > But within the kernel, I think my use of minor is consistent with the
> > rest of the drm subsystem.  I hope I don't need to reform the way the
> > drm subsystem use minor in order to introduce a cgroup controller.
>
> Well I would try to avoid using the minor and at least look for
> alternatives. E.g. what does udev uses to identify the devices for
> example? And IIRC we have something like a "device-name" in the kernel
> as well (what's printed in the logs).
>
> The minimum we need to do is get away from the minor=linenum approach,
> cause as Daniel pointed out the minor allocation is quite a mess and not
> necessary contiguous.

I noticed :) but looks like there isn't much of a choice from what
Tejun/cgroup replied about convention.

Regards,
Kenny
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [RFC PATCH v3 07/11] drm, cgroup: Add TTM buffer allocation stats

2019-06-27 Thread Kenny Ho
On Thu, Jun 27, 2019 at 2:01 AM Daniel Vetter  wrote:
>
> btw reminds me: I guess it would be good to have a per-type .total
> read-only exposed, so that userspace has an idea of how much there is?
> ttm is trying to be agnostic to the allocator that's used to manage a
> memory type/resource, so doesn't even know that. But I think something we
> need to expose to admins, otherwise they can't meaningfully set limits.

I don't think I understand this bit, do you mean total across multiple
GPU of the same mem type?  Or do you mean the total available per GPU
(or something else?)

Regards,
Kenny
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [RFC PATCH v3 04/11] drm, cgroup: Add total GEM buffer allocation limit

2019-06-27 Thread Kenny Ho
On Thu, Jun 27, 2019 at 1:43 AM Daniel Vetter  wrote:
>
> On Wed, Jun 26, 2019 at 06:41:32PM -0400, Kenny Ho wrote:
> > So without the sharing restriction and some kind of ownership
> > structure, we will have to migrate/change the owner of the buffer when
> > the cgroup that created the buffer die before the receiving cgroup(s)
> > and I am not sure how to do that properly at the moment.  1) Should
> > each cgroup keep track of all the buffers that belongs to it and
> > migrate?  (Is that efficient?)  2) which cgroup should be the new
> > owner (and therefore have the limit applied?)  Having the creator
> > being the owner is kind of natural, but let say the buffer is shared
> > with 5 other cgroups, which of these 5 cgroups should be the new owner
> > of the buffer?
>
> Different answers:
>
> - Do we care if we leak bos like this in a cgroup, if the cgroup
>   disappears before all the bo are cleaned up?
>
> - Just charge the bo to each cgroup it's in? Will be quite a bit more
>   tracking needed to get that done ...
That seems to be the approach memcg takes, but as shown by the lwn
link you sent me from the last rfc (talk from Roman Gushchin), that
approach is not problem free either.  And wouldn't this approach
disconnect resource management from the underlying resource one would
like to control?  For example, if you have 5 MB of memory, you can
have 5 users using 1 MB each.  But in the charge-everybody approach, a
1 MB usage shared 4 times will make it looks like 5MB is used.  So the
resource being control is no longer 'real' since the amount of
resource you have is now dynamic and depends on the amount of sharing
one does.

> - Also, there's the legacy way of sharing a bo, with the FLINK and
>   GEM_OPEN ioctls. We need to plug these holes too.
>
> Just feels like your current solution is technically well-justified, but
> it completely defeats the point of cgroups/containers and buffer sharing
> ...
Um... I am going to get a bit philosophical here and suggest that the
idea of sharing (especially uncontrolled sharing) is inherently at odd
with containment.  It's like, if everybody is special, no one is
special.  Perhaps an alternative is to make this configurable so that
people can allow sharing knowing the caveat?  And just to be clear,
the current solution allows for sharing, even between cgroup.

Regards,
Kenny
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[RFC PATCH v3 04/11] drm, cgroup: Add total GEM buffer allocation limit

2019-06-26 Thread Kenny Ho
The drm resource being measured and limited here is the GEM buffer
objects.  User applications allocate and free these buffers.  In
addition, a process can allocate a buffer and share it with another
process.  The consumer of a shared buffer can also outlive the
allocator of the buffer.

For the purpose of cgroup accounting and limiting, ownership of the
buffer is deemed to be the cgroup for which the allocating process
belongs to.  There is one cgroup limit per drm device.

In order to prevent the buffer outliving the cgroup that owns it, a
process is prevented from importing buffers that are not own by the
process' cgroup or the ancestors of the process' cgroup.  In other
words, in order for a buffer to be shared between two cgroups, the
buffer must be created by the common ancestors of the cgroups.

drm.buffer.stats
A read-only flat-keyed file which exists on all cgroups.  Each
entry is keyed by the drm device's major:minor.

Total GEM buffer allocation in bytes.

drm.buffer.default
A read-only flat-keyed file which exists on the root cgroup.
Each entry is keyed by the drm device's major:minor.

Default limits on the total GEM buffer allocation in bytes.

drm.buffer.max
A read-write flat-keyed file which exists on all cgroups.  Each
entry is keyed by the drm device's major:minor.

Per device limits on the total GEM buffer allocation in byte.
This is a hard limit.  Attempts in allocating beyond the cgroup
limit will result in ENOMEM.  Shorthand understood by memparse
(such as k, m, g) can be used.

Set allocation limit for /dev/dri/card1 to 1GB
echo "226:1 1g" > drm.buffer.total.max

Set allocation limit for /dev/dri/card0 to 512MB
echo "226:0 512m" > drm.buffer.total.max

Change-Id: I4c249d06d45ec709d6481d4cbe87c5168545c5d0
Signed-off-by: Kenny Ho 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |   4 +
 drivers/gpu/drm/drm_gem.c  |   8 +
 drivers/gpu/drm/drm_prime.c|   9 +
 include/drm/drm_cgroup.h   |  34 ++-
 include/drm/drm_gem.h  |  11 +
 include/linux/cgroup_drm.h |   2 +
 kernel/cgroup/drm.c| 321 +
 7 files changed, 387 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 93b2c5a48a71..b4c078b7ad63 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -34,6 +34,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_amdkfd.h"
@@ -446,6 +447,9 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
if (!amdgpu_bo_validate_size(adev, size, bp->domain))
return -ENOMEM;
 
+   if (!drmcgrp_bo_can_allocate(current, adev->ddev, size))
+   return -ENOMEM;
+
*bo_ptr = NULL;
 
acc_size = ttm_bo_dma_acc_size(>mman.bdev, size,
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 6a80db077dc6..e20c1034bf2b 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -37,10 +37,12 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
 #include 
+#include 
 #include "drm_internal.h"
 
 /** @file drm_gem.c
@@ -154,6 +156,9 @@ void drm_gem_private_object_init(struct drm_device *dev,
obj->handle_count = 0;
obj->size = size;
drm_vma_node_reset(>vma_node);
+
+   obj->drmcgrp = get_drmcgrp(current);
+   drmcgrp_chg_bo_alloc(obj->drmcgrp, dev, size);
 }
 EXPORT_SYMBOL(drm_gem_private_object_init);
 
@@ -804,6 +809,9 @@ drm_gem_object_release(struct drm_gem_object *obj)
if (obj->filp)
fput(obj->filp);
 
+   drmcgrp_unchg_bo_alloc(obj->drmcgrp, obj->dev, obj->size);
+   put_drmcgrp(obj->drmcgrp);
+
drm_gem_free_mmap_offset(obj);
 }
 EXPORT_SYMBOL(drm_gem_object_release);
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 231e3f6d5f41..eeb612116810 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -32,6 +32,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "drm_internal.h"
 
@@ -794,6 +795,7 @@ int drm_gem_prime_fd_to_handle(struct drm_device *dev,
 {
struct dma_buf *dma_buf;
struct drm_gem_object *obj;
+   struct drmcgrp *drmcgrp = drmcgrp_from(current);
int ret;
 
dma_buf = dma_buf_get(prime_fd);
@@ -818,6 +820,13 @@ int drm_gem_prime_fd_to_handle(struct drm_device *dev,
goto out_unlock;
}
 
+   /* only allow bo from the same cgroup or its ancestor to be imported */
+   if (drmcgrp != NULL &&
+  

[RFC PATCH v3 01/11] cgroup: Introduce cgroup for drm subsystem

2019-06-26 Thread Kenny Ho
Change-Id: I6830d3990f63f0c13abeba29b1d330cf28882831
Signed-off-by: Kenny Ho 
---
 include/linux/cgroup_drm.h| 76 +++
 include/linux/cgroup_subsys.h |  4 ++
 init/Kconfig  |  5 +++
 kernel/cgroup/Makefile|  1 +
 kernel/cgroup/drm.c   | 42 +++
 5 files changed, 128 insertions(+)
 create mode 100644 include/linux/cgroup_drm.h
 create mode 100644 kernel/cgroup/drm.c

diff --git a/include/linux/cgroup_drm.h b/include/linux/cgroup_drm.h
new file mode 100644
index ..9928e60037a5
--- /dev/null
+++ b/include/linux/cgroup_drm.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: MIT
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ */
+#ifndef _CGROUP_DRM_H
+#define _CGROUP_DRM_H
+
+#ifdef CONFIG_CGROUP_DRM
+
+#include 
+
+struct drmcgrp {
+   struct cgroup_subsys_state  css;
+};
+
+static inline struct drmcgrp *css_drmcgrp(struct cgroup_subsys_state *css)
+{
+   return css ? container_of(css, struct drmcgrp, css) : NULL;
+}
+
+static inline struct drmcgrp *drmcgrp_from(struct task_struct *task)
+{
+   return css_drmcgrp(task_get_css(task, drm_cgrp_id));
+}
+
+static inline struct drmcgrp *get_drmcgrp(struct task_struct *task)
+{
+   struct cgroup_subsys_state *css = task_get_css(task, drm_cgrp_id);
+
+   if (css)
+   css_get(css);
+
+   return css_drmcgrp(css);
+}
+
+static inline void put_drmcgrp(struct drmcgrp *drmcgrp)
+{
+   if (drmcgrp)
+   css_put(>css);
+}
+
+static inline struct drmcgrp *parent_drmcgrp(struct drmcgrp *cg)
+{
+   return css_drmcgrp(cg->css.parent);
+}
+
+#else /* CONFIG_CGROUP_DRM */
+
+struct drmcgrp {
+};
+
+static inline struct drmcgrp *css_drmcgrp(struct cgroup_subsys_state *css)
+{
+   return NULL;
+}
+
+static inline struct drmcgrp *drmcgrp_from(struct task_struct *task)
+{
+   return NULL;
+}
+
+static inline struct drmcgrp *get_drmcgrp(struct task_struct *task)
+{
+   return NULL;
+}
+
+static inline void put_drmcgrp(struct drmcgrp *drmcgrp)
+{
+}
+
+static inline struct drmcgrp *parent_drmcgrp(struct drmcgrp *cg)
+{
+   return NULL;
+}
+
+#endif /* CONFIG_CGROUP_DRM */
+#endif /* _CGROUP_DRM_H */
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index acb77dcff3b4..ddedad809e8b 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -61,6 +61,10 @@ SUBSYS(pids)
 SUBSYS(rdma)
 #endif
 
+#if IS_ENABLED(CONFIG_CGROUP_DRM)
+SUBSYS(drm)
+#endif
+
 /*
  * The following subsystems are not supported on the default hierarchy.
  */
diff --git a/init/Kconfig b/init/Kconfig
index d47cb77a220e..0b0f112eb23b 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -839,6 +839,11 @@ config CGROUP_RDMA
  Attaching processes with active RDMA resources to the cgroup
  hierarchy is allowed even if can cross the hierarchy's limit.
 
+config CGROUP_DRM
+   bool "DRM controller (EXPERIMENTAL)"
+   help
+ Provides accounting and enforcement of resources in the DRM subsystem.
+
 config CGROUP_FREEZER
bool "Freezer controller"
help
diff --git a/kernel/cgroup/Makefile b/kernel/cgroup/Makefile
index bfcdae896122..6af14bd93050 100644
--- a/kernel/cgroup/Makefile
+++ b/kernel/cgroup/Makefile
@@ -4,5 +4,6 @@ obj-y := cgroup.o rstat.o namespace.o cgroup-v1.o
 obj-$(CONFIG_CGROUP_FREEZER) += freezer.o
 obj-$(CONFIG_CGROUP_PIDS) += pids.o
 obj-$(CONFIG_CGROUP_RDMA) += rdma.o
+obj-$(CONFIG_CGROUP_DRM) += drm.o
 obj-$(CONFIG_CPUSETS) += cpuset.o
 obj-$(CONFIG_CGROUP_DEBUG) += debug.o
diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
new file mode 100644
index ..66cb1dda023d
--- /dev/null
+++ b/kernel/cgroup/drm.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: MIT
+// Copyright 2019 Advanced Micro Devices, Inc.
+#include 
+#include 
+#include 
+
+static struct drmcgrp *root_drmcgrp __read_mostly;
+
+static void drmcgrp_css_free(struct cgroup_subsys_state *css)
+{
+   struct drmcgrp *drmcgrp = css_drmcgrp(css);
+
+   kfree(drmcgrp);
+}
+
+static struct cgroup_subsys_state *
+drmcgrp_css_alloc(struct cgroup_subsys_state *parent_css)
+{
+   struct drmcgrp *parent = css_drmcgrp(parent_css);
+   struct drmcgrp *drmcgrp;
+
+   drmcgrp = kzalloc(sizeof(struct drmcgrp), GFP_KERNEL);
+   if (!drmcgrp)
+   return ERR_PTR(-ENOMEM);
+
+   if (!parent)
+   root_drmcgrp = drmcgrp;
+
+   return >css;
+}
+
+struct cftype files[] = {
+   { } /* terminate */
+};
+
+struct cgroup_subsys drm_cgrp_subsys = {
+   .css_alloc  = drmcgrp_css_alloc,
+   .css_free   = drmcgrp_css_free,
+   .early_init = false,
+   .legacy_cftypes = files,
+   .dfl_cftypes= files,
+};
-- 
2.21.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[RFC PATCH v3 05/11] drm, cgroup: Add peak GEM buffer allocation limit

2019-06-26 Thread Kenny Ho
drm.buffer.peak.stats
A read-only flat-keyed file which exists on all cgroups.  Each
entry is keyed by the drm device's major:minor.

Largest GEM buffer allocated in bytes.

drm.buffer.peak.default
A read-only flat-keyed file which exists on the root cgroup.
Each entry is keyed by the drm device's major:minor.

Default limits on the largest GEM buffer allocation in bytes.

drm.buffer.peak.max
A read-write flat-keyed file which exists on all cgroups.  Each
entry is keyed by the drm device's major:minor.

Per device limits on the largest GEM buffer allocation in bytes.
This is a hard limit.  Attempts in allocating beyond the cgroup
limit will result in ENOMEM.  Shorthand understood by memparse
(such as k, m, g) can be used.

Set largest allocation for /dev/dri/card1 to 4MB
echo "226:1 4m" > drm.buffer.peak.max

Change-Id: I0830d56775568e1cf215b56cc892d5e7945e9f25
Signed-off-by: Kenny Ho 
---
 include/linux/cgroup_drm.h |  3 ++
 kernel/cgroup/drm.c| 61 ++
 2 files changed, 64 insertions(+)

diff --git a/include/linux/cgroup_drm.h b/include/linux/cgroup_drm.h
index efa019666f1c..126c156ffd70 100644
--- a/include/linux/cgroup_drm.h
+++ b/include/linux/cgroup_drm.h
@@ -17,6 +17,9 @@ struct drmcgrp_device_resource {
/* for per device stats */
s64 bo_stats_total_allocated;
s64 bo_limits_total_allocated;
+
+   size_t  bo_stats_peak_allocated;
+   size_t  bo_limits_peak_allocated;
 };
 
 struct drmcgrp {
diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
index cfc1fe74dca3..265008197654 100644
--- a/kernel/cgroup/drm.c
+++ b/kernel/cgroup/drm.c
@@ -19,6 +19,7 @@ struct drmcgrp_device {
struct mutexmutex;
 
s64 bo_limits_total_allocated_default;
+   size_t  bo_limits_peak_allocated_default;
 };
 
 #define DRMCG_CTF_PRIV_SIZE 3
@@ -31,6 +32,7 @@ struct drmcgrp_device {
 
 enum drmcgrp_res_type {
DRMCGRP_TYPE_BO_TOTAL,
+   DRMCGRP_TYPE_BO_PEAK,
 };
 
 enum drmcgrp_file_type {
@@ -78,6 +80,9 @@ static inline int init_drmcgrp_single(struct drmcgrp 
*drmcgrp, int minor)
if (known_drmcgrp_devs[minor] != NULL) {
ddr->bo_limits_total_allocated =
  known_drmcgrp_devs[minor]->bo_limits_total_allocated_default;
+
+   ddr->bo_limits_peak_allocated =
+ known_drmcgrp_devs[minor]->bo_limits_peak_allocated_default;
}
 
return 0;
@@ -137,6 +142,9 @@ static inline void drmcgrp_print_stats(struct 
drmcgrp_device_resource *ddr,
case DRMCGRP_TYPE_BO_TOTAL:
seq_printf(sf, "%lld\n", ddr->bo_stats_total_allocated);
break;
+   case DRMCGRP_TYPE_BO_PEAK:
+   seq_printf(sf, "%zu\n", ddr->bo_stats_peak_allocated);
+   break;
default:
seq_puts(sf, "\n");
break;
@@ -155,6 +163,9 @@ static inline void drmcgrp_print_limits(struct 
drmcgrp_device_resource *ddr,
case DRMCGRP_TYPE_BO_TOTAL:
seq_printf(sf, "%lld\n", ddr->bo_limits_total_allocated);
break;
+   case DRMCGRP_TYPE_BO_PEAK:
+   seq_printf(sf, "%zu\n", ddr->bo_limits_peak_allocated);
+   break;
default:
seq_puts(sf, "\n");
break;
@@ -174,6 +185,10 @@ static inline void drmcgrp_print_default(struct 
drmcgrp_device *ddev,
seq_printf(sf, "%lld\n",
ddev->bo_limits_total_allocated_default);
break;
+   case DRMCGRP_TYPE_BO_PEAK:
+   seq_printf(sf, "%zu\n",
+   ddev->bo_limits_peak_allocated_default);
+   break;
default:
seq_puts(sf, "\n");
break;
@@ -315,6 +330,23 @@ ssize_t drmcgrp_bo_limit_write(struct kernfs_open_file 
*of, char *buf,
 
ddr->bo_limits_total_allocated = val;
break;
+   case DRMCGRP_TYPE_BO_PEAK:
+   p_max = parent == NULL ? SIZE_MAX :
+   parent->dev_resources[minor]->
+   bo_limits_peak_allocated;
+
+   rc = drmcgrp_process_limit_val(sattr, true,
+   ddev->bo_limits_peak_allocated_default,
+   p_max,
+   );
+
+   if (rc || val < 0) {
+   drmcgrp_pr_cft_err(drmcgrp, cft_name, minor);
+   continue;
+   }

[RFC PATCH v3 06/11] drm, cgroup: Add GEM buffer allocation count stats

2019-06-26 Thread Kenny Ho
drm.buffer.count.stats
A read-only flat-keyed file which exists on all cgroups.  Each
entry is keyed by the drm device's major:minor.

Total number of GEM buffer allocated.

Change-Id: Id3e1809d5fee8562e47a7d2b961688956d844ec6
Signed-off-by: Kenny Ho 
---
 include/linux/cgroup_drm.h |  2 ++
 kernel/cgroup/drm.c| 23 ---
 2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/include/linux/cgroup_drm.h b/include/linux/cgroup_drm.h
index 126c156ffd70..e4400b21ab8e 100644
--- a/include/linux/cgroup_drm.h
+++ b/include/linux/cgroup_drm.h
@@ -20,6 +20,8 @@ struct drmcgrp_device_resource {
 
size_t  bo_stats_peak_allocated;
size_t  bo_limits_peak_allocated;
+
+   s64 bo_stats_count_allocated;
 };
 
 struct drmcgrp {
diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
index 265008197654..9144f93b851f 100644
--- a/kernel/cgroup/drm.c
+++ b/kernel/cgroup/drm.c
@@ -33,6 +33,7 @@ struct drmcgrp_device {
 enum drmcgrp_res_type {
DRMCGRP_TYPE_BO_TOTAL,
DRMCGRP_TYPE_BO_PEAK,
+   DRMCGRP_TYPE_BO_COUNT,
 };
 
 enum drmcgrp_file_type {
@@ -145,6 +146,9 @@ static inline void drmcgrp_print_stats(struct 
drmcgrp_device_resource *ddr,
case DRMCGRP_TYPE_BO_PEAK:
seq_printf(sf, "%zu\n", ddr->bo_stats_peak_allocated);
break;
+   case DRMCGRP_TYPE_BO_COUNT:
+   seq_printf(sf, "%lld\n", ddr->bo_stats_count_allocated);
+   break;
default:
seq_puts(sf, "\n");
break;
@@ -396,6 +400,12 @@ struct cftype files[] = {
.private = DRMCG_CTF_PRIV(DRMCGRP_TYPE_BO_PEAK,
DRMCGRP_FTYPE_LIMIT),
},
+   {
+   .name = "buffer.count.stats",
+   .seq_show = drmcgrp_bo_show,
+   .private = DRMCG_CTF_PRIV(DRMCGRP_TYPE_BO_COUNT,
+   DRMCGRP_FTYPE_STATS),
+   },
{ } /* terminate */
 };
 
@@ -518,6 +528,8 @@ void drmcgrp_chg_bo_alloc(struct drmcgrp *drmcgrp, struct 
drm_device *dev,
 
if (ddr->bo_stats_peak_allocated < (size_t)size)
ddr->bo_stats_peak_allocated = (size_t)size;
+
+   ddr->bo_stats_count_allocated++;
}
mutex_unlock(_drmcgrp_devs[devIdx]->mutex);
 }
@@ -526,15 +538,20 @@ EXPORT_SYMBOL(drmcgrp_chg_bo_alloc);
 void drmcgrp_unchg_bo_alloc(struct drmcgrp *drmcgrp, struct drm_device *dev,
size_t size)
 {
+   struct drmcgrp_device_resource *ddr;
int devIdx = dev->primary->index;
 
if (drmcgrp == NULL || known_drmcgrp_devs[devIdx] == NULL)
return;
 
mutex_lock(_drmcgrp_devs[devIdx]->mutex);
-   for ( ; drmcgrp != NULL; drmcgrp = parent_drmcgrp(drmcgrp))
-   drmcgrp->dev_resources[devIdx]->bo_stats_total_allocated
-   -= (s64)size;
+   for ( ; drmcgrp != NULL; drmcgrp = parent_drmcgrp(drmcgrp)) {
+   ddr = drmcgrp->dev_resources[devIdx];
+
+   ddr->bo_stats_total_allocated -= (s64)size;
+
+   ddr->bo_stats_count_allocated--;
+   }
mutex_unlock(_drmcgrp_devs[devIdx]->mutex);
 }
 EXPORT_SYMBOL(drmcgrp_unchg_bo_alloc);
-- 
2.21.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[RFC PATCH v3 00/11] new cgroup controller for gpu/drm subsystem

2019-06-26 Thread Kenny Ho
://kubernetes.io/docs/tasks/manage-gpus/scheduling-gpus/
[6] 
https://blog.openshift.com/gpu-accelerated-sql-queries-with-postgresql-pg-strom-in-openshift-3-10/
[7] https://github.com/RadeonOpenCompute/k8s-device-plugin
[8] https://github.com/kubernetes/kubernetes/issues/52757

Kenny Ho (11):
  cgroup: Introduce cgroup for drm subsystem
  cgroup: Add mechanism to register DRM devices
  drm/amdgpu: Register AMD devices for DRM cgroup
  drm, cgroup: Add total GEM buffer allocation limit
  drm, cgroup: Add peak GEM buffer allocation limit
  drm, cgroup: Add GEM buffer allocation count stats
  drm, cgroup: Add TTM buffer allocation stats
  drm, cgroup: Add TTM buffer peak usage stats
  drm, cgroup: Add per cgroup bw measure and control
  drm, cgroup: Add soft VRAM limit
  drm, cgroup: Allow more aggressive memory reclaim

 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c|4 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |4 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c|3 +-
 drivers/gpu/drm/drm_gem.c  |8 +
 drivers/gpu/drm/drm_prime.c|9 +
 drivers/gpu/drm/ttm/ttm_bo.c   |   91 ++
 drivers/gpu/drm/ttm/ttm_bo_util.c  |4 +
 include/drm/drm_cgroup.h   |  115 ++
 include/drm/drm_gem.h  |   11 +
 include/drm/ttm/ttm_bo_api.h   |2 +
 include/drm/ttm/ttm_bo_driver.h|   10 +
 include/linux/cgroup_drm.h |  114 ++
 include/linux/cgroup_subsys.h  |4 +
 init/Kconfig   |5 +
 kernel/cgroup/Makefile |1 +
 kernel/cgroup/drm.c| 1171 
 16 files changed, 1555 insertions(+), 1 deletion(-)
 create mode 100644 include/drm/drm_cgroup.h
 create mode 100644 include/linux/cgroup_drm.h
 create mode 100644 kernel/cgroup/drm.c

-- 
2.21.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[RFC PATCH v3 09/11] drm, cgroup: Add per cgroup bw measure and control

2019-06-26 Thread Kenny Ho
The bandwidth is measured by keeping track of the amount of bytes moved
by ttm within a time period.  We defined two type of bandwidth: burst
and average.  Average bandwidth is calculated by dividing the total
amount of bytes moved within a cgroup by the lifetime of the cgroup.
Burst bandwidth is similar except that the byte and time measurement is
reset after a user configurable period.

The bandwidth control is best effort since it is done on a per move
basis instead of per byte.  The bandwidth is limited by delaying the
move of a buffer.  The bandwidth limit can be exceeded when the next
move is larger than the remaining allowance.

drm.burst_bw_period_in_us
A read-write flat-keyed file which exists on the root cgroup.
Each entry is keyed by the drm device's major:minor.

Length of a period use to measure burst bandwidth in us.
One period per device.

drm.burst_bw_period_in_us.default
A read-only flat-keyed file which exists on the root cgroup.
Each entry is keyed by the drm device's major:minor.

Default length of a period in us (one per device.)

drm.bandwidth.stats
A read-only nested-keyed file which exists on all cgroups.
Each entry is keyed by the drm device's major:minor.  The
following nested keys are defined.

  = ==
  burst_byte_per_us Burst bandwidth
  avg_bytes_per_us  Average bandwidth
  moved_byteAmount of byte moved within a period
  accum_us  Amount of time accumulated in a period
  total_moved_byte  Byte moved within the cgroup lifetime
  total_accum_usCgroup lifetime in us
  byte_credit   Available byte credit to limit avg bw
  = ==

Reading returns the following::
226:1 burst_byte_per_us=23 avg_bytes_per_us=0 moved_byte=2244608
accum_us=95575 total_moved_byte=45899776 total_accum_us=201634590
byte_credit=13214278590464
226:2 burst_byte_per_us=10 avg_bytes_per_us=219 moved_byte=430080
accum_us=39350 total_moved_byte=65518026752 total_accum_us=298337721
byte_credit=9223372036854644735

drm.bandwidth.high
A read-write nested-keyed file which exists on all cgroups.
Each entry is keyed by the drm device's major:minor.  The
following nested keys are defined.

    ===
  bytes_in_period   Burst limit per period in byte
  avg_bytes_per_us  Average bandwidth limit in bytes per us
    ===

Reading returns the following::

226:1 bytes_in_period=9223372036854775807 avg_bytes_per_us=65536
226:2 bytes_in_period=9223372036854775807 avg_bytes_per_us=65536

drm.bandwidth.default
A read-only nested-keyed file which exists on the root cgroup.
Each entry is keyed by the drm device's major:minor.  The
following nested keys are defined.

    
  bytes_in_period   Default burst limit per period in byte
  avg_bytes_per_us  Default average bw limit in bytes per us
    

Reading returns the following::

226:1 bytes_in_period=9223372036854775807 avg_bytes_per_us=65536
226:2 bytes_in_period=9223372036854775807 avg_bytes_per_us=65536

Change-Id: Ie573491325ccc16535bb943e7857f43bd0962add
Signed-off-by: Kenny Ho 
---
 drivers/gpu/drm/ttm/ttm_bo.c |   7 +
 include/drm/drm_cgroup.h |  13 ++
 include/linux/cgroup_drm.h   |  14 ++
 kernel/cgroup/drm.c  | 309 ++-
 4 files changed, 340 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index e9f70547f0ad..f06c2b9d8a4a 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -36,6 +36,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -1176,6 +1177,12 @@ int ttm_bo_validate(struct ttm_buffer_object *bo,
 * Check whether we need to move buffer.
 */
if (!ttm_bo_mem_compat(placement, >mem, _flags)) {
+   unsigned int move_delay = drmcgrp_get_mem_bw_period_in_us(bo);
+   move_delay /= 2000; /* check every half period in ms*/
+   while (bo->bdev->ddev != NULL && !drmcgrp_mem_can_move(bo)) {
+   msleep(move_delay);
+   }
+
ret = ttm_bo_move_buffer(bo, placement, ctx);
if (ret)
return ret;
diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h
index 48ab5450cf17..9b1dbd6a4eca 100644
--- a/include/drm/drm_cgroup.h

[RFC PATCH v3 03/11] drm/amdgpu: Register AMD devices for DRM cgroup

2019-06-26 Thread Kenny Ho
Change-Id: I3750fc657b956b52750a36cb303c54fa6a265b44
Signed-off-by: Kenny Ho 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index da7b4fe8ade3..2568fd730161 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -28,6 +28,7 @@
 #include 
 #include "amdgpu.h"
 #include 
+#include 
 #include "amdgpu_sched.h"
 #include "amdgpu_uvd.h"
 #include "amdgpu_vce.h"
@@ -97,6 +98,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev)
 
amdgpu_device_fini(adev);
 
+   drmcgrp_unregister_device(dev);
 done_free:
kfree(adev);
dev->dev_private = NULL;
@@ -141,6 +143,8 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned 
long flags)
struct amdgpu_device *adev;
int r, acpi_status;
 
+   drmcgrp_register_device(dev);
+
 #ifdef CONFIG_DRM_AMDGPU_SI
if (!amdgpu_si_support) {
switch (flags & AMD_ASIC_MASK) {
-- 
2.21.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[RFC PATCH v3 02/11] cgroup: Add mechanism to register DRM devices

2019-06-26 Thread Kenny Ho
Change-Id: I908ee6975ea0585e4c30eafde4599f87094d8c65
Signed-off-by: Kenny Ho 
---
 include/drm/drm_cgroup.h   |  24 
 include/linux/cgroup_drm.h |  10 
 kernel/cgroup/drm.c| 116 +
 3 files changed, 150 insertions(+)
 create mode 100644 include/drm/drm_cgroup.h

diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h
new file mode 100644
index ..ddb9eab64360
--- /dev/null
+++ b/include/drm/drm_cgroup.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: MIT
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ */
+#ifndef __DRM_CGROUP_H__
+#define __DRM_CGROUP_H__
+
+#ifdef CONFIG_CGROUP_DRM
+
+int drmcgrp_register_device(struct drm_device *device);
+
+int drmcgrp_unregister_device(struct drm_device *device);
+
+#else
+static inline int drmcgrp_register_device(struct drm_device *device)
+{
+   return 0;
+}
+
+static inline int drmcgrp_unregister_device(struct drm_device *device)
+{
+   return 0;
+}
+#endif /* CONFIG_CGROUP_DRM */
+#endif /* __DRM_CGROUP_H__ */
diff --git a/include/linux/cgroup_drm.h b/include/linux/cgroup_drm.h
index 9928e60037a5..27497f786c93 100644
--- a/include/linux/cgroup_drm.h
+++ b/include/linux/cgroup_drm.h
@@ -6,10 +6,20 @@
 
 #ifdef CONFIG_CGROUP_DRM
 
+#include 
 #include 
+#include 
+
+/* limit defined per the way drm_minor_alloc operates */
+#define MAX_DRM_DEV (64 * DRM_MINOR_RENDER)
+
+struct drmcgrp_device_resource {
+   /* for per device stats */
+};
 
 struct drmcgrp {
struct cgroup_subsys_state  css;
+   struct drmcgrp_device_resource  *dev_resources[MAX_DRM_DEV];
 };
 
 static inline struct drmcgrp *css_drmcgrp(struct cgroup_subsys_state *css)
diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
index 66cb1dda023d..7da6e0d93991 100644
--- a/kernel/cgroup/drm.c
+++ b/kernel/cgroup/drm.c
@@ -1,28 +1,99 @@
 // SPDX-License-Identifier: MIT
 // Copyright 2019 Advanced Micro Devices, Inc.
+#include 
 #include 
 #include 
+#include 
+#include 
+#include 
 #include 
+#include 
+#include 
+#include 
+
+static DEFINE_MUTEX(drmcgrp_mutex);
+
+struct drmcgrp_device {
+   struct drm_device   *dev;
+   struct mutexmutex;
+};
+
+/* indexed by drm_minor for access speed */
+static struct drmcgrp_device   *known_drmcgrp_devs[MAX_DRM_DEV];
+
+static int max_minor;
+
 
 static struct drmcgrp *root_drmcgrp __read_mostly;
 
 static void drmcgrp_css_free(struct cgroup_subsys_state *css)
 {
struct drmcgrp *drmcgrp = css_drmcgrp(css);
+   int i;
+
+   for (i = 0; i <= max_minor; i++) {
+   if (drmcgrp->dev_resources[i] != NULL)
+   kfree(drmcgrp->dev_resources[i]);
+   }
 
kfree(drmcgrp);
 }
 
+static inline int init_drmcgrp_single(struct drmcgrp *drmcgrp, int minor)
+{
+   struct drmcgrp_device_resource *ddr = drmcgrp->dev_resources[minor];
+
+   if (ddr == NULL) {
+   ddr = kzalloc(sizeof(struct drmcgrp_device_resource),
+   GFP_KERNEL);
+
+   if (!ddr)
+   return -ENOMEM;
+
+   drmcgrp->dev_resources[minor] = ddr;
+   }
+
+   /* set defaults here */
+
+   return 0;
+}
+
+static inline int init_drmcgrp(struct drmcgrp *drmcgrp, struct drm_device *dev)
+{
+   int rc = 0;
+   int i;
+
+   if (dev != NULL) {
+   rc = init_drmcgrp_single(drmcgrp, dev->primary->index);
+   return rc;
+   }
+
+   for (i = 0; i <= max_minor; i++) {
+   rc = init_drmcgrp_single(drmcgrp, i);
+   if (rc)
+   return rc;
+   }
+
+   return 0;
+}
+
 static struct cgroup_subsys_state *
 drmcgrp_css_alloc(struct cgroup_subsys_state *parent_css)
 {
struct drmcgrp *parent = css_drmcgrp(parent_css);
struct drmcgrp *drmcgrp;
+   int rc;
 
drmcgrp = kzalloc(sizeof(struct drmcgrp), GFP_KERNEL);
if (!drmcgrp)
return ERR_PTR(-ENOMEM);
 
+   rc = init_drmcgrp(drmcgrp, NULL);
+   if (rc) {
+   drmcgrp_css_free(>css);
+   return ERR_PTR(rc);
+   }
+
if (!parent)
root_drmcgrp = drmcgrp;
 
@@ -40,3 +111,48 @@ struct cgroup_subsys drm_cgrp_subsys = {
.legacy_cftypes = files,
.dfl_cftypes= files,
 };
+
+int drmcgrp_register_device(struct drm_device *dev)
+{
+   struct drmcgrp_device *ddev;
+
+   ddev = kzalloc(sizeof(struct drmcgrp_device), GFP_KERNEL);
+   if (!ddev)
+   return -ENOMEM;
+
+   ddev->dev = dev;
+   mutex_init(>mutex);
+
+   mutex_lock(_mutex);
+   known_drmcgrp_devs[dev->primary->index] = ddev;
+   max_minor = max(max_minor, dev->primary->index);
+   mutex_unlock(_mutex);
+
+   /* init cgroups created before registration (i.e. root cgroup) */
+   if (root_drmcgrp != NULL) {
+   struct cgroup_subsys_stat

[RFC PATCH v3 08/11] drm, cgroup: Add TTM buffer peak usage stats

2019-06-26 Thread Kenny Ho
drm.memory.peak.stats
A read-only nested-keyed file which exists on all cgroups.
Each entry is keyed by the drm device's major:minor.  The
following nested keys are defined.

  == ==
  system Peak host memory used
  tt Peak host memory used by the device (GTT/GART)
  vram   Peak Video RAM used by the drm device
  priv   Other drm device specific memory peak usage
  == ==

Reading returns the following::

226:0 system=0 tt=0 vram=0 priv=0
226:1 system=0 tt=9035776 vram=17768448 priv=16809984
226:2 system=0 tt=9035776 vram=17768448 priv=16809984

Change-Id: I986e44533848f66411465bdd52105e78105a709a
Signed-off-by: Kenny Ho 
---
 include/linux/cgroup_drm.h |  1 +
 kernel/cgroup/drm.c| 20 
 2 files changed, 21 insertions(+)

diff --git a/include/linux/cgroup_drm.h b/include/linux/cgroup_drm.h
index 141bea06f74c..922529641df5 100644
--- a/include/linux/cgroup_drm.h
+++ b/include/linux/cgroup_drm.h
@@ -25,6 +25,7 @@ struct drmcgrp_device_resource {
s64 bo_stats_count_allocated;
 
s64 mem_stats[TTM_PL_PRIV+1];
+   s64 mem_peaks[TTM_PL_PRIV+1];
s64 mem_stats_evict;
 };
 
diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
index 5aee42a628c1..5f5fa6a2b068 100644
--- a/kernel/cgroup/drm.c
+++ b/kernel/cgroup/drm.c
@@ -38,6 +38,7 @@ enum drmcgrp_res_type {
DRMCGRP_TYPE_BO_COUNT,
DRMCGRP_TYPE_MEM,
DRMCGRP_TYPE_MEM_EVICT,
+   DRMCGRP_TYPE_MEM_PEAK,
 };
 
 enum drmcgrp_file_type {
@@ -171,6 +172,13 @@ static inline void drmcgrp_print_stats(struct 
drmcgrp_device_resource *ddr,
case DRMCGRP_TYPE_MEM_EVICT:
seq_printf(sf, "%lld\n", ddr->mem_stats_evict);
break;
+   case DRMCGRP_TYPE_MEM_PEAK:
+   for (i = 0; i <= TTM_PL_PRIV; i++) {
+   seq_printf(sf, "%s=%lld ", ttm_placement_names[i],
+   ddr->mem_peaks[i]);
+   }
+   seq_puts(sf, "\n");
+   break;
default:
seq_puts(sf, "\n");
break;
@@ -440,6 +448,12 @@ struct cftype files[] = {
.private = DRMCG_CTF_PRIV(DRMCGRP_TYPE_MEM_EVICT,
DRMCGRP_FTYPE_STATS),
},
+   {
+   .name = "memory.peaks.stats",
+   .seq_show = drmcgrp_bo_show,
+   .private = DRMCG_CTF_PRIV(DRMCGRP_TYPE_MEM_PEAK,
+   DRMCGRP_FTYPE_STATS),
+   },
{ } /* terminate */
 };
 
@@ -608,6 +622,8 @@ void drmcgrp_chg_mem(struct ttm_buffer_object *tbo)
for ( ; drmcgrp != NULL; drmcgrp = parent_drmcgrp(drmcgrp)) {
ddr = drmcgrp->dev_resources[devIdx];
ddr->mem_stats[mem_type] += size;
+   ddr->mem_peaks[mem_type] = max(ddr->mem_peaks[mem_type],
+   ddr->mem_stats[mem_type]);
}
mutex_unlock(_drmcgrp_devs[devIdx]->mutex);
 }
@@ -662,6 +678,10 @@ void drmcgrp_mem_track_move(struct ttm_buffer_object 
*old_bo, bool evict,
ddr->mem_stats[old_mem_type] -= move_in_bytes;
ddr->mem_stats[new_mem_type] += move_in_bytes;
 
+   ddr->mem_peaks[new_mem_type] = max(
+   ddr->mem_peaks[new_mem_type],
+   ddr->mem_stats[new_mem_type]);
+
if (evict)
ddr->mem_stats_evict++;
}
-- 
2.21.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[RFC PATCH v3 07/11] drm, cgroup: Add TTM buffer allocation stats

2019-06-26 Thread Kenny Ho
The drm resource being measured is the TTM (Translation Table Manager)
buffers.  TTM manages different types of memory that a GPU might access.
These memory types include dedicated Video RAM (VRAM) and host/system
memory accessible through IOMMU (GART/GTT).  TTM is currently used by
multiple drm drivers (amd, ast, bochs, cirrus, hisilicon, maga200,
nouveau, qxl, virtio, vmwgfx.)

drm.memory.stats
A read-only nested-keyed file which exists on all cgroups.
Each entry is keyed by the drm device's major:minor.  The
following nested keys are defined.

  == =
  system Host/system memory
  tt Host memory used by the drm device (GTT/GART)
  vram   Video RAM used by the drm device
  priv   Other drm device, vendor specific memory
  == =

Reading returns the following::

226:0 system=0 tt=0 vram=0 priv=0
226:1 system=0 tt=9035776 vram=17768448 priv=16809984
226:2 system=0 tt=9035776 vram=17768448 priv=16809984

drm.memory.evict.stats
A read-only flat-keyed file which exists on all cgroups.  Each
entry is keyed by the drm device's major:minor.

Total number of evictions.

Change-Id: Ice2c4cc845051229549bebeb6aa2d7d6153bdf6a
Signed-off-by: Kenny Ho 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |   3 +-
 drivers/gpu/drm/ttm/ttm_bo.c|  30 +++
 drivers/gpu/drm/ttm/ttm_bo_util.c   |   4 +
 include/drm/drm_cgroup.h|  19 
 include/drm/ttm/ttm_bo_api.h|   2 +
 include/drm/ttm/ttm_bo_driver.h |   8 ++
 include/linux/cgroup_drm.h  |   4 +
 kernel/cgroup/drm.c | 113 
 8 files changed, 182 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index e9ecc3953673..a8dfc78ed45f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1678,8 +1678,9 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
mutex_init(>mman.gtt_window_lock);
 
/* No others user of address space so set it to 0 */
-   r = ttm_bo_device_init(>mman.bdev,
+   r = ttm_bo_device_init_tmp(>mman.bdev,
   _bo_driver,
+  adev->ddev,
   adev->ddev->anon_inode->i_mapping,
   adev->need_dma32);
if (r) {
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 2845fceb2fbd..e9f70547f0ad 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -34,6 +34,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -42,6 +43,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static void ttm_bo_global_kobj_release(struct kobject *kobj);
 
@@ -151,6 +153,10 @@ static void ttm_bo_release_list(struct kref *list_kref)
struct ttm_bo_device *bdev = bo->bdev;
size_t acc_size = bo->acc_size;
 
+   if (bo->bdev->ddev != NULL) // TODO: remove after ddev initiazlied for 
all
+   drmcgrp_unchg_mem(bo);
+   put_drmcgrp(bo->drmcgrp);
+
BUG_ON(kref_read(>list_kref));
BUG_ON(kref_read(>kref));
BUG_ON(atomic_read(>cpu_writers));
@@ -353,6 +359,8 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object 
*bo,
if (bo->mem.mem_type == TTM_PL_SYSTEM) {
if (bdev->driver->move_notify)
bdev->driver->move_notify(bo, evict, mem);
+   if (bo->bdev->ddev != NULL) // TODO: remove after ddev 
initiazlied for all
+   drmcgrp_mem_track_move(bo, evict, mem);
bo->mem = *mem;
mem->mm_node = NULL;
goto moved;
@@ -361,6 +369,8 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object 
*bo,
 
if (bdev->driver->move_notify)
bdev->driver->move_notify(bo, evict, mem);
+   if (bo->bdev->ddev != NULL) // TODO: remove after ddev initiazlied for 
all
+   drmcgrp_mem_track_move(bo, evict, mem);
 
if (!(old_man->flags & TTM_MEMTYPE_FLAG_FIXED) &&
!(new_man->flags & TTM_MEMTYPE_FLAG_FIXED))
@@ -374,6 +384,8 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object 
*bo,
if (bdev->driver->move_notify) {
swap(*mem, bo->mem);
bdev->driver->move_notify(bo, false, mem);
+   if (bo->bdev->ddev != NULL) // TODO: remove after ddev 
initiazlied for all
+  

[RFC PATCH v3 10/11] drm, cgroup: Add soft VRAM limit

2019-06-26 Thread Kenny Ho
The drm resource being limited is the TTM (Translation Table Manager)
buffers.  TTM manages different types of memory that a GPU might access.
These memory types include dedicated Video RAM (VRAM) and host/system
memory accessible through IOMMU (GART/GTT).  TTM is currently used by
multiple drm drivers (amd, ast, bochs, cirrus, hisilicon, maga200,
nouveau, qxl, virtio, vmwgfx.)

TTM buffers belonging to drm cgroups under memory pressure will be
selected to be evicted first.

drm.memory.high
A read-write nested-keyed file which exists on all cgroups.
Each entry is keyed by the drm device's major:minor.  The
following nested keys are defined.

   =
  vram Video RAM soft limit for a drm device in byte
   =

Reading returns the following::

226:0 vram=0
226:1 vram=17768448
226:2 vram=17768448

drm.memory.default
A read-only nested-keyed file which exists on the root cgroup.
Each entry is keyed by the drm device's major:minor.  The
following nested keys are defined.

   ===
  vram Video RAM default limit in byte
   ===

Reading returns the following::

226:0 vram=0
226:1 vram=17768448
226:2 vram=17768448

Change-Id: I7988e28a453b53140b40a28c176239acbc81d491
Signed-off-by: Kenny Ho 
---
 drivers/gpu/drm/ttm/ttm_bo.c |   7 ++
 include/drm/drm_cgroup.h |  15 
 include/linux/cgroup_drm.h   |   2 +
 kernel/cgroup/drm.c  | 145 +++
 4 files changed, 169 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index f06c2b9d8a4a..79c530f4a198 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -806,12 +806,19 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
struct ttm_mem_type_manager *man = >man[mem_type];
struct ttm_buffer_object *bo = NULL;
bool locked = false;
+bool check_drmcgrp;
unsigned i;
int ret;
 
+   check_drmcgrp = drmcgrp_mem_pressure_scan(bdev, mem_type);
+
spin_lock(>lru_lock);
for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
list_for_each_entry(bo, >lru[i], lru) {
+   if (check_drmcgrp &&
+   !drmcgrp_mem_should_evict(bo, mem_type))
+   continue;
+
if (!ttm_bo_evict_swapout_allowable(bo, ctx, ))
continue;
 
diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h
index 9b1dbd6a4eca..360c1e6c809f 100644
--- a/include/drm/drm_cgroup.h
+++ b/include/drm/drm_cgroup.h
@@ -6,6 +6,7 @@
 
 #include 
 #include 
+#include 
 
 #ifdef CONFIG_CGROUP_DRM
 
@@ -25,6 +26,8 @@ void drmcgrp_mem_track_move(struct ttm_buffer_object *old_bo, 
bool evict,
struct ttm_mem_reg *new_mem);
 unsigned int drmcgrp_get_mem_bw_period_in_us(struct ttm_buffer_object *tbo);
 bool drmcgrp_mem_can_move(struct ttm_buffer_object *tbo);
+bool drmcgrp_mem_pressure_scan(struct ttm_bo_device *bdev, unsigned type);
+bool drmcgrp_mem_should_evict(struct ttm_buffer_object *tbo, unsigned type);
 
 #else
 static inline int drmcgrp_register_device(struct drm_device *device)
@@ -82,5 +85,17 @@ static inline bool drmcgrp_mem_can_move(struct 
ttm_buffer_object *tbo)
 {
return true;
 }
+
+static inline bool drmcgrp_mem_pressure_scan(struct ttm_bo_device *bdev,
+   unsigned type)
+{
+   return false;
+}
+
+static inline bool drmcgrp_mem_should_evict(struct ttm_buffer_object *tbo,
+   unsigned type)
+{
+   return true;
+}
 #endif /* CONFIG_CGROUP_DRM */
 #endif /* __DRM_CGROUP_H__ */
diff --git a/include/linux/cgroup_drm.h b/include/linux/cgroup_drm.h
index 94828da2104a..52ef02eaac70 100644
--- a/include/linux/cgroup_drm.h
+++ b/include/linux/cgroup_drm.h
@@ -35,6 +35,8 @@ struct drmcgrp_device_resource {
 
s64 mem_stats[TTM_PL_PRIV+1];
s64 mem_peaks[TTM_PL_PRIV+1];
+   s64 mem_highs[TTM_PL_PRIV+1];
+   boolmem_pressure[TTM_PL_PRIV+1];
s64 mem_stats_evict;
 
s64 mem_bw_stats_last_update_us;
diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
index bbc6612200a4..1ce13db36ce9 100644
--- a/kernel/cgroup/drm.c
+++ b/kernel/cgroup/drm.c
@@ -29,6 +29,8 @@ struct drmcgrp_device {
 
s64 mem_bw_bytes_in_period_default;
s64 mem_bw_avg_bytes_per_us_default;
+
+   s64 mem_highs_default[TTM_PL_PRIV+1];
 };
 
 #define DRMCG_CTF_PRIV_SIZE 3
@@ -114,6 +116,8 

[RFC PATCH v3 11/11] drm, cgroup: Allow more aggressive memory reclaim

2019-06-26 Thread Kenny Ho
Allow DRM TTM memory manager to register a work_struct, such that, when
a drmcgrp is under memory pressure, memory reclaiming can be triggered
immediately.

Change-Id: I25ac04e2db9c19ff12652b88ebff18b44b2706d8
Signed-off-by: Kenny Ho 
---
 drivers/gpu/drm/ttm/ttm_bo.c| 47 +
 include/drm/drm_cgroup.h| 14 ++
 include/drm/ttm/ttm_bo_driver.h |  2 ++
 kernel/cgroup/drm.c | 33 +++
 4 files changed, 96 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 79c530f4a198..5fc3bc5bd4c5 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1509,6 +1509,44 @@ int ttm_bo_evict_mm(struct ttm_bo_device *bdev, unsigned 
mem_type)
 }
 EXPORT_SYMBOL(ttm_bo_evict_mm);
 
+static void ttm_bo_reclaim_wq(struct work_struct *work)
+{
+   struct ttm_operation_ctx ctx = {
+   .interruptible = false,
+   .no_wait_gpu = false,
+   .flags = TTM_OPT_FLAG_FORCE_ALLOC
+   };
+   struct ttm_mem_type_manager *man =
+   container_of(work, struct ttm_mem_type_manager, reclaim_wq);
+   struct ttm_bo_device *bdev = man->bdev;
+   struct dma_fence *fence;
+   int mem_type;
+   int ret;
+
+   for (mem_type = 0; mem_type < TTM_NUM_MEM_TYPES; mem_type++)
+   if (>man[mem_type] == man)
+   break;
+
+   BUG_ON(mem_type >= TTM_NUM_MEM_TYPES);
+
+   if (!drmcgrp_mem_pressure_scan(bdev, mem_type))
+   return;
+
+   ret = ttm_mem_evict_first(bdev, mem_type, NULL, );
+   if (ret)
+   return;
+
+   spin_lock(>move_lock);
+   fence = dma_fence_get(man->move);
+   spin_unlock(>move_lock);
+
+   if (fence) {
+   ret = dma_fence_wait(fence, false);
+   dma_fence_put(fence);
+   }
+
+}
+
 int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type,
unsigned long p_size)
 {
@@ -1543,6 +1581,13 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned 
type,
INIT_LIST_HEAD(>lru[i]);
man->move = NULL;
 
+   pr_err("drmcgrp %p type %d\n", bdev->ddev, type);
+
+   if (type <= TTM_PL_VRAM) {
+   INIT_WORK(>reclaim_wq, ttm_bo_reclaim_wq);
+   drmcgrp_register_device_mm(bdev->ddev, type, >reclaim_wq);
+   }
+
return 0;
 }
 EXPORT_SYMBOL(ttm_bo_init_mm);
@@ -1620,6 +1665,8 @@ int ttm_bo_device_release(struct ttm_bo_device *bdev)
man = >man[i];
if (man->has_type) {
man->use_type = false;
+   drmcgrp_unregister_device_mm(bdev->ddev, i);
+   cancel_work_sync(>reclaim_wq);
if ((i != TTM_PL_SYSTEM) && ttm_bo_clean_mm(bdev, i)) {
ret = -EBUSY;
pr_err("DRM memory manager type %d is not 
clean\n",
diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h
index 360c1e6c809f..134d6e5475f3 100644
--- a/include/drm/drm_cgroup.h
+++ b/include/drm/drm_cgroup.h
@@ -5,6 +5,7 @@
 #define __DRM_CGROUP_H__
 
 #include 
+#include 
 #include 
 #include 
 
@@ -12,6 +13,9 @@
 
 int drmcgrp_register_device(struct drm_device *device);
 int drmcgrp_unregister_device(struct drm_device *device);
+void drmcgrp_register_device_mm(struct drm_device *dev, unsigned type,
+   struct work_struct *wq);
+void drmcgrp_unregister_device_mm(struct drm_device *dev, unsigned type);
 bool drmcgrp_is_self_or_ancestor(struct drmcgrp *self,
struct drmcgrp *relative);
 void drmcgrp_chg_bo_alloc(struct drmcgrp *drmcgrp, struct drm_device *dev,
@@ -40,6 +44,16 @@ static inline int drmcgrp_unregister_device(struct 
drm_device *device)
return 0;
 }
 
+static inline void drmcgrp_register_device_mm(struct drm_device *dev,
+   unsigned type, struct work_struct *wq)
+{
+}
+
+static inline void drmcgrp_unregister_device_mm(struct drm_device *dev,
+   unsigned type)
+{
+}
+
 static inline bool drmcgrp_is_self_or_ancestor(struct drmcgrp *self,
struct drmcgrp *relative)
 {
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 4cbcb41e5aa9..0956ca7888fc 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -205,6 +205,8 @@ struct ttm_mem_type_manager {
 * Protected by @move_lock.
 */
struct dma_fence *move;
+
+   struct work_struct reclaim_wq;
 };
 
 /**
diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
index 1ce13db36ce9..985a89e849d3 100644
--- a/kernel/cgroup/drm.c
+++ b/kernel/cgroup/drm.c
@@ -31,6 +31,8 @@ struct drmcgrp_device {
s64 mem_bw_avg_bytes_per_us_default;
 
s64 mem_highs_default[TTM_PL_PR

Re: [RFC PATCH v3 01/11] cgroup: Introduce cgroup for drm subsystem

2019-06-26 Thread Kenny Ho
On Wed, Jun 26, 2019 at 11:49 AM Daniel Vetter  wrote:
>
> Bunch of naming bikesheds

I appreciate the suggestions, naming is hard :).

> > +#include 
> > +
> > +struct drmcgrp {
>
> drm_cgroup for more consistency how we usually call these things.

I was hoping to keep the symbol short if possible.  I started with
drmcg (following blkcg),  but I believe that causes confusion with
other aspect of the drm subsystem.  I don't have too strong of an
opinion on this but I'd prefer not needing to keep refactoring.  So if
there are other opinions on this, please speak up.

> > +
> > +static inline void put_drmcgrp(struct drmcgrp *drmcgrp)
>
> In drm we generally put _get/_put at the end, cgroup seems to do the same.

ok, I will refactor.

> > +{
> > + if (drmcgrp)
> > + css_put(>css);
> > +}
> > +
> > +static inline struct drmcgrp *parent_drmcgrp(struct drmcgrp *cg)
>
> I'd also call this drm_cgroup_parent or so.
>
> Also all the above needs a bit of nice kerneldoc for the final version.
> -Daniel

Noted, will do, thanks.

Regards,
Kenny
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [RFC PATCH v3 04/11] drm, cgroup: Add total GEM buffer allocation limit

2019-06-26 Thread Kenny Ho
On Wed, Jun 26, 2019 at 12:05 PM Daniel Vetter  wrote:
>
> > drm.buffer.default
> > A read-only flat-keyed file which exists on the root cgroup.
> > Each entry is keyed by the drm device's major:minor.
> >
> > Default limits on the total GEM buffer allocation in bytes.
>
> Don't we need a "0 means no limit" semantics here?

I believe the convention is to use the 'max' keyword.

>
> I think we need a new drm-cgroup.rst which contains all this
> documentation.

Yes I planned to do that when things are more finalized.  I am
actually writing the commit message following the current doc format
so I can reuse it in the rst.

>
> With multiple GPUs, do we need an overall GEM bo limit, across all gpus?
> For other stuff later on like vram/tt/... and all that it needs to be
> per-device, but I think one overall limit could be useful.

This one I am not sure but should be fairly straightforward to add.
I'd love to hear more feedbacks on this as well.

> >   if (!amdgpu_bo_validate_size(adev, size, bp->domain))
> >   return -ENOMEM;
> >
> > + if (!drmcgrp_bo_can_allocate(current, adev->ddev, size))
> > + return -ENOMEM;
>
> So what happens when you start a lot of threads all at the same time,
> allocating gem bo? Also would be nice if we could roll out at least the
> accounting part of this cgroup to all GEM drivers.

When there is a large number of allocation, the allocation will be
checked in sequence within a device (since I used a per device mutex
in the check.)  Are you suggesting the overhead here is significant
enough to be a bottleneck?  The accounting part should be available to
all GEM drivers (unless I missed something) since the chg and unchg
function is called via the generic drm_gem_private_object_init and
drm_gem_object_release.

> > + /* only allow bo from the same cgroup or its ancestor to be imported 
> > */
> > + if (drmcgrp != NULL &&
>
> Quite a serious limitation here ...
>
> > + !drmcgrp_is_self_or_ancestor(drmcgrp, obj->drmcgrp)) {
>
> Also what happens if you actually share across devices? Then importing in
> the 2nd group is suddenly possible, and I think will be double-counted.
>
> What's the underlying technical reason for not allowing sharing across
> cgroups?

With the current implementation, there shouldn't be double counting as
the counting is done during the buffer init.

To be clear, sharing across cgroup is allowed, the buffer just needs
to be allocated by a process that is parent to the cgroup.  So in the
case of xorg allocating buffer for client, the xorg would be in the
root cgroup and the buffer can be passed around by different clients
(in root or other cgroup.)  The idea here is to establish some form of
ownership, otherwise there wouldn't be a way to account for or limit
the usage.

Regards,
Kenny
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [RFC PATCH v3 02/11] cgroup: Add mechanism to register DRM devices

2019-06-26 Thread Kenny Ho
(sending again, I keep missing the reply-all in gmail.)

On Wed, Jun 26, 2019 at 11:56 AM Daniel Vetter  wrote:
>
> Why the separate, explicit registration step? I think a simpler design for
> drivers would be that we set up cgroups if there's anything to be
> controlled, and then for GEM drivers the basic GEM stuff would be set up
> automically (there's really no reason not to I think).

Is this what you mean with the comment about drm_dev_register below?
I think I understand what you are saying but not super clear.  Are you
suggesting the use of driver feature bits (drm_core_check_feature,
etc.) similar to the way Brian Welty did in his proposal in May?

> Also tying to the minor is a bit funky, since we have multiple of these.
> Need to make sure were at least consistent with whether we use the primary
> or render minor - I'd always go with the primary one like you do here.

Um... come to think of it, I can probably embed struct drmcgrp_device
into drm_device and that way I don't really need to keep a separate
array of
known_drmcgrp_devs and get rid of that max_minor thing.  Not sure why
I didn't think of this before.

> > +
> > +int drmcgrp_register_device(struct drm_device *dev)
>
> Imo this should be done as part of drm_dev_register (maybe only if the
> driver has set up a controller or something). Definitely with the
> unregister logic below. Also anything used by drivers needs kerneldoc.
>
>
> > + /* init cgroups created before registration (i.e. root cgroup) */
> > + if (root_drmcgrp != NULL) {
> > + struct cgroup_subsys_state *pos;
> > + struct drmcgrp *child;
> > +
> > + rcu_read_lock();
> > + css_for_each_descendant_pre(pos, _drmcgrp->css) {
> > + child = css_drmcgrp(pos);
> > + init_drmcgrp(child, dev);
> > + }
> > + rcu_read_unlock();
>
> I have no idea, but is this guaranteed to get them all?

I believe so, base on my understanding about
css_for_each_descendant_pre and how I am starting from the root
cgroup.  Hopefully I didn't miss anything.

Regards,
Kenny
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [RFC PATCH v3 02/11] cgroup: Add mechanism to register DRM devices

2019-06-26 Thread Kenny Ho
On Wed, Jun 26, 2019 at 5:04 PM Daniel Vetter  wrote:
> On Wed, Jun 26, 2019 at 10:37 PM Kenny Ho  wrote:
> > (sending again, I keep missing the reply-all in gmail.)
> You can make it the default somewhere in the gmail options.
Um... interesting, my option was actually not set (neither reply or reply-all.)

> > On Wed, Jun 26, 2019 at 11:56 AM Daniel Vetter  wrote:
> > >
> > > Why the separate, explicit registration step? I think a simpler design for
> > > drivers would be that we set up cgroups if there's anything to be
> > > controlled, and then for GEM drivers the basic GEM stuff would be set up
> > > automically (there's really no reason not to I think).
> >
> > Is this what you mean with the comment about drm_dev_register below?
> > I think I understand what you are saying but not super clear.  Are you
> > suggesting the use of driver feature bits (drm_core_check_feature,
> > etc.) similar to the way Brian Welty did in his proposal in May?
>
> Also not exactly a fan of driver feature bits tbh. What I had in mind was:
>
> - For stuff like the GEM accounting which we can do for all drivers
> easily (we can't do the enforcment, that needs a few changes), just
> roll it out for everyone. I.e. if you enable the DRMCG Kconfig, all
> DRIVER_GEM would get that basic gem cgroup accounting.
>
> - for other bits the driver just registers certain things, like "I can
> enforce gem limits" or "I have gpu memory regions vram, tt, and system
> and can enforce them" in their normal driver setup. Then at
> drm_dev_register time we register all these additional cgroups, like
> we today register all the other interafaces and pieces of a drm_device
> (drm_minor, drm_connectors, debugfs files, sysfs stuff, all these
> things).
>
> Since the concepts are still a bit in flux, let's take an example from
> the modeset side:
> - driver call drm_connector_init() to create connector object
> - drm_dev_register() also sets up all the public interfaces for that
> connector (debugfs, sysfs, ...)
>
> I think a similar setup would be good for cgroups here, you just
> register your special ttm_mem_reg or whatever, and the magic happens
> automatically.

Ok, I will look into those (I am not too familiar about those at this point.)

> > > I have no idea, but is this guaranteed to get them all?
> >
> > I believe so, base on my understanding about
> > css_for_each_descendant_pre and how I am starting from the root
> > cgroup.  Hopefully I didn't miss anything.
>
> Well it's rcu, so I expect it'll race with concurrent
> addition/removal. And the kerneldoc has some complicated sounding
> comments about how to synchronize that with some locks that I don't
> fully understand, but I think you're also not having any additional
> locking so not sure this all works correctly ...
>
> Do we still need the init_dmcgrp stuff if we'd just embedd? That would
> probably be the simplest way to solve this all :-)

I will need to dig into it a bit more to know for sure.  I think I
still need the init_drmcgrp stuff. I implemented it like this because
the cgroup subsystem appear to be initialized before the drm subsystem
so the root cgroup does not know any drm devices and the per device
default limits are not set.  In theory, I should only need to set the
root cgroup (so I don't need to use css_for_each_descendant_pre, which
requires the rcu_lock.)  But I am not 100% confident there won't be
any additional cgroup being added to the hierarchy between cgroup
subsystem init and drm subsystem init.

Alternatively I can protect it with an additional mutex but I am not
sure if that's needed.

Regards,
Kenny
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [RFC PATCH v3 00/11] new cgroup controller for gpu/drm subsystem

2019-06-29 Thread Kenny Ho
On Thu, Jun 27, 2019 at 3:24 AM Daniel Vetter  wrote:
> Another question I have: What about HMM? With the device memory zone
> the core mm will be a lot more involved in managing that, but I also
> expect that we'll have classic buffer-based management for a long time
> still. So these need to work together, and I fear slightly that we'll
> have memcg and drmcg fighting over the same pieces a bit perhaps?
>
> Adding Jerome, maybe he has some thoughts on this.

I just did a bit of digging and this looks like the current behaviour:
https://www.kernel.org/doc/html/v5.1/vm/hmm.html#memory-cgroup-memcg-and-rss-accounting

"For now device memory is accounted as any regular page in rss
counters (either anonymous if device page is used for anonymous, file
if device page is used for file backed page or shmem if device page is
used for shared memory). This is a deliberate choice to keep existing
applications, that might start using device memory without knowing
about it, running unimpacted.

A drawback is that the OOM killer might kill an application using a
lot of device memory and not a lot of regular system memory and thus
not freeing much system memory. We want to gather more real world
experience on how applications and system react under memory pressure
in the presence of device memory before deciding to account device
memory differently."

Regards,
Kenny
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [RFC PATCH v3 07/11] drm, cgroup: Add TTM buffer allocation stats

2019-06-26 Thread Kenny Ho
On Wed, Jun 26, 2019 at 12:12 PM Daniel Vetter  wrote:
>
> On Wed, Jun 26, 2019 at 11:05:18AM -0400, Kenny Ho wrote:
> > drm.memory.stats
> > A read-only nested-keyed file which exists on all cgroups.
> > Each entry is keyed by the drm device's major:minor.  The
> > following nested keys are defined.
> >
> >   == =
> >   system Host/system memory
>
> Shouldn't that be covered by gem bo stats already? Also, system memory is
> definitely something a lot of non-ttm drivers want to be able to track, so
> that needs to be separate from ttm.
The gem bo stats covers all of these type.  I am treat the gem stats
as more of the front end and a hard limit and this set of stats as the
backing store which can be of various type.  How does non-ttm drivers
identify various memory types?

> >   tt Host memory used by the drm device (GTT/GART)
> >   vram   Video RAM used by the drm device
> >   priv   Other drm device, vendor specific memory
>
> So what's "priv". In general I think we need some way to register the
> different kinds of memory, e.g. stuff not in your list:
>
> - multiple kinds of vram (like numa-style gpus)
> - cma (for all those non-ttm drivers that's a big one, it's like system
>   memory but also totally different)
> - any carveouts and stuff
privs are vendor specific, which is why I have truncated it.  For
example, AMD has AMDGPU_PL_GDS, GWS, OA
https://elixir.bootlin.com/linux/v5.2-rc6/source/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h#L30

Since we are using keyed file type, we should be able to support
vendor specific memory type but I am not sure if this is acceptable to
cgroup upstream.  This is why I stick to the 3 memory type that is
common across all ttm drivers.

> I think with all the ttm refactoring going on I think we need to de-ttm
> the interface functions here a bit. With Gerd Hoffmans series you can just
> use a gem_bo pointer here, so what's left to do is have some extracted
> structure for tracking memory types. I think Brian Welty has some ideas
> for this, even in patch form. Would be good to keep him on cc at least for
> the next version. We'd need to explicitly hand in the ttm_mem_reg (or
> whatever the specific thing is going to be).

I assume Gerd Hoffman's series you are referring to is this one?
https://www.spinics.net/lists/dri-devel/msg215056.html

I can certainly keep an eye out for Gerd's refactoring while
refactoring other parts of this RFC.

I have added Brian and Gerd to the thread for awareness.

Regards,
Kenny
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [RFC PATCH v3 09/11] drm, cgroup: Add per cgroup bw measure and control

2019-06-26 Thread Kenny Ho
On Wed, Jun 26, 2019 at 12:25 PM Daniel Vetter  wrote:
>
> On Wed, Jun 26, 2019 at 11:05:20AM -0400, Kenny Ho wrote:
> > The bandwidth is measured by keeping track of the amount of bytes moved
> > by ttm within a time period.  We defined two type of bandwidth: burst
> > and average.  Average bandwidth is calculated by dividing the total
> > amount of bytes moved within a cgroup by the lifetime of the cgroup.
> > Burst bandwidth is similar except that the byte and time measurement is
> > reset after a user configurable period.
>
> So I'm not too sure exposing this is a great idea, at least depending upon
> what you're trying to do with it. There's a few concerns here:
>
> - I think bo movement stats might be useful, but they're not telling you
>   everything. Applications can also copy data themselves and put buffers
>   where they want them, especially with more explicit apis like vk.
>
> - which kind of moves are we talking about here? Eviction related bo moves
>   seem not counted here, and if you have lots of gpus with funny
>   interconnects you might also get other kinds of moves, not just system
>   ram <-> vram.
Eviction move is counted but I think I placed the delay in the wrong
place (the tracking of byte moved is in previous patch in
ttm_bo_handle_move_mem, which is common to all move as far as I can
tell.)

> - What happens if we slow down, but someone else needs to evict our
>   buffers/move them (ttm is atm not great at this, but Christian König is
>   working on patches). I think there's lots of priority inversion
>   potential here.
>
> - If the goal is to avoid thrashing the interconnects, then this isn't the
>   full picture by far - apps can use copy engines and explicit placement,
>   again that's how vulkan at least is supposed to work.
>
> I guess these all boil down to: What do you want to achieve here? The
> commit message doesn't explain the intended use-case of this.
Thrashing prevention is the intent.  I am not familiar with Vulkan so
I will have to get back to you on that.  I don't know how those
explicit placement translate into the kernel.  At this stage, I think
it's still worth while to have this as a resource even if some
applications bypass the kernel.  I certainly welcome more feedback on
this topic.

Regards,
Kenny
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [RFC PATCH v3 04/11] drm, cgroup: Add total GEM buffer allocation limit

2019-06-26 Thread Kenny Ho
On Wed, Jun 26, 2019 at 5:41 PM Daniel Vetter  wrote:
> On Wed, Jun 26, 2019 at 05:27:48PM -0400, Kenny Ho wrote:
> > On Wed, Jun 26, 2019 at 12:05 PM Daniel Vetter  wrote:
> > > So what happens when you start a lot of threads all at the same time,
> > > allocating gem bo? Also would be nice if we could roll out at least the
> > > accounting part of this cgroup to all GEM drivers.
> >
> > When there is a large number of allocation, the allocation will be
> > checked in sequence within a device (since I used a per device mutex
> > in the check.)  Are you suggesting the overhead here is significant
> > enough to be a bottleneck?  The accounting part should be available to
> > all GEM drivers (unless I missed something) since the chg and unchg
> > function is called via the generic drm_gem_private_object_init and
> > drm_gem_object_release.
>
> thread 1: checks limits, still under the total
>
> thread 2: checks limits, still under the total
>
> thread 1: allocates, still under
>
> thread 2: allocates, now over the limit
>
> I think the check and chg need to be one step, or this wont work. Or I'm
> missing something somewhere.

Ok, I see what you are saying.

> Wrt rolling out the accounting for all drivers: Since you also roll out
> enforcement in this patch I'm not sure whether the accounting part is
> fully stand-alone. And as discussed a bit on an earlier patch, I think for
> DRIVER_GEM we should set up the accounting cgroup automatically.
I think I should be able to split the commit and restructure things a bit.

> > > What's the underlying technical reason for not allowing sharing across
> > > cgroups?
> > To be clear, sharing across cgroup is allowed, the buffer just needs
> > to be allocated by a process that is parent to the cgroup.  So in the
> > case of xorg allocating buffer for client, the xorg would be in the
> > root cgroup and the buffer can be passed around by different clients
> > (in root or other cgroup.)  The idea here is to establish some form of
> > ownership, otherwise there wouldn't be a way to account for or limit
> > the usage.
>
> But why? What's the problem if I allocate something and then hand it to
> someone else. E.g. one popular use of cgroups is to isolate clients, so
> maybe you'd do a cgroup + namespace for each X11 client (ok wayland, with
> X11 this is probably pointless).
>
> But with your current limitation those clients can't pass buffers to the
> compositor anymore, making cgroups useless. Your example here only works
> if Xorg is in the root and allocates all the buffers. That's not even true
> for DRI3 anymore.
>
> So pretty serious limitation on cgroups, and I'm not really understanding
> why we need this. I think if we want to prevent buffer sharing, what we
> need are some selinux hooks and stuff so you can prevent an import/access
> by someone who's not allowed to touch a buffer. But that kind of access
> right management should be separate from resource control imo.
So without the sharing restriction and some kind of ownership
structure, we will have to migrate/change the owner of the buffer when
the cgroup that created the buffer die before the receiving cgroup(s)
and I am not sure how to do that properly at the moment.  1) Should
each cgroup keep track of all the buffers that belongs to it and
migrate?  (Is that efficient?)  2) which cgroup should be the new
owner (and therefore have the limit applied?)  Having the creator
being the owner is kind of natural, but let say the buffer is shared
with 5 other cgroups, which of these 5 cgroups should be the new owner
of the buffer?

Regards,
Kenny
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [RFC PATCH v3 11/11] drm, cgroup: Allow more aggressive memory reclaim

2019-06-26 Thread Kenny Ho
Ok.  I am not too familiar with shrinker but I will dig into it.  Just
so that I am looking into the right things, you are referring to
things like struct shrinker and struct shrink_control?

Regards,
Kenny

On Wed, Jun 26, 2019 at 12:44 PM Daniel Vetter  wrote:
>
> On Wed, Jun 26, 2019 at 11:05:22AM -0400, Kenny Ho wrote:
> > Allow DRM TTM memory manager to register a work_struct, such that, when
> > a drmcgrp is under memory pressure, memory reclaiming can be triggered
> > immediately.
> >
> > Change-Id: I25ac04e2db9c19ff12652b88ebff18b44b2706d8
> > Signed-off-by: Kenny Ho 
> > ---
> >  drivers/gpu/drm/ttm/ttm_bo.c| 47 +
> >  include/drm/drm_cgroup.h| 14 ++
> >  include/drm/ttm/ttm_bo_driver.h |  2 ++
> >  kernel/cgroup/drm.c | 33 +++
> >  4 files changed, 96 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> > index 79c530f4a198..5fc3bc5bd4c5 100644
> > --- a/drivers/gpu/drm/ttm/ttm_bo.c
> > +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> > @@ -1509,6 +1509,44 @@ int ttm_bo_evict_mm(struct ttm_bo_device *bdev, 
> > unsigned mem_type)
> >  }
> >  EXPORT_SYMBOL(ttm_bo_evict_mm);
> >
> > +static void ttm_bo_reclaim_wq(struct work_struct *work)
> > +{
>
> I think a design a bit more inspired by memcg aware core shrinkers would
> be nice, i.e. explicitly passing:
> - which drm_cgroup needs to be shrunk
> - which ttm_mem_reg (well the fancy new abstracted out stuff for tracking
>   special gpu memory resources like tt or vram or whatever)
> - how much it needs to be shrunk
>
> I think with that a lot more the book-keeping could be pushed into the
> drm_cgroup code, and the callback just needs to actually shrink enough as
> requested.
> -Daniel
>
> > + struct ttm_operation_ctx ctx = {
> > + .interruptible = false,
> > + .no_wait_gpu = false,
> > + .flags = TTM_OPT_FLAG_FORCE_ALLOC
> > + };
> > + struct ttm_mem_type_manager *man =
> > + container_of(work, struct ttm_mem_type_manager, reclaim_wq);
> > + struct ttm_bo_device *bdev = man->bdev;
> > + struct dma_fence *fence;
> > + int mem_type;
> > + int ret;
> > +
> > + for (mem_type = 0; mem_type < TTM_NUM_MEM_TYPES; mem_type++)
> > + if (>man[mem_type] == man)
> > + break;
> > +
> > + BUG_ON(mem_type >= TTM_NUM_MEM_TYPES);
> > +
> > + if (!drmcgrp_mem_pressure_scan(bdev, mem_type))
> > + return;
> > +
> > + ret = ttm_mem_evict_first(bdev, mem_type, NULL, );
> > + if (ret)
> > + return;
> > +
> > + spin_lock(>move_lock);
> > + fence = dma_fence_get(man->move);
> > + spin_unlock(>move_lock);
> > +
> > + if (fence) {
> > + ret = dma_fence_wait(fence, false);
> > + dma_fence_put(fence);
> > + }
> > +
> > +}
> > +
> >  int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type,
> >   unsigned long p_size)
> >  {
> > @@ -1543,6 +1581,13 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, 
> > unsigned type,
> >   INIT_LIST_HEAD(>lru[i]);
> >   man->move = NULL;
> >
> > + pr_err("drmcgrp %p type %d\n", bdev->ddev, type);
> > +
> > + if (type <= TTM_PL_VRAM) {
> > + INIT_WORK(>reclaim_wq, ttm_bo_reclaim_wq);
> > + drmcgrp_register_device_mm(bdev->ddev, type, 
> > >reclaim_wq);
> > + }
> > +
> >   return 0;
> >  }
> >  EXPORT_SYMBOL(ttm_bo_init_mm);
> > @@ -1620,6 +1665,8 @@ int ttm_bo_device_release(struct ttm_bo_device *bdev)
> >   man = >man[i];
> >   if (man->has_type) {
> >   man->use_type = false;
> > + drmcgrp_unregister_device_mm(bdev->ddev, i);
> > + cancel_work_sync(>reclaim_wq);
> >   if ((i != TTM_PL_SYSTEM) && ttm_bo_clean_mm(bdev, i)) 
> > {
> >   ret = -EBUSY;
> >   pr_err("DRM memory manager type %d is not 
> > clean\n",
> > diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h
> > index 360c1e6c809f..134d6e5475f3 100644
> > --- a/include/drm/drm_cgroup.h
> > +++ b/include/drm/drm_cgroup.h
> > @@ -5,6 +5,7 @@
>

Re: [RFC PATCH v3 09/11] drm, cgroup: Add per cgroup bw measure and control

2019-06-28 Thread Kenny Ho
On Thu, Jun 27, 2019 at 2:11 AM Daniel Vetter  wrote:
> I feel like a better approach would by to add a cgroup for the various
> engines on the gpu, and then also account all the sdma (or whatever the
> name of the amd copy engines is again) usage by ttm_bo moves to the right
> cgroup.  I think that's a more meaningful limitation. For direct thrashing
> control I think there's both not enough information available in the
> kernel (you'd need some performance counters to watch how much bandwidth
> userspace batches/CS are wasting), and I don't think the ttm eviction
> logic is ready to step over all the priority inversion issues this will
> bring up. Managing sdma usage otoh will be a lot more straightforward (but
> still has all the priority inversion problems, but in the scheduler that
> might be easier to fix perhaps with the explicit dependency graph - in the
> i915 scheduler we already have priority boosting afaiui).
My concern with hooking into the engine/ lower level is that the
engine may not be process/cgroup aware.  So the bandwidth tracking is
per device.  I am also wondering if this is also potentially be a case
of perfect getting in the way of good.  While ttm_bo_handle_move_mem
may not track everything, it is still a key function for a lot of the
memory operation.  Also, if the programming model is designed to
bypass the kernel then I am not sure if there are anything the kernel
can do.  (Things like kernel-bypass network stack comes to mind.)  All
that said, I will certainly dig deeper into the topic.

Regards,
Kenny
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [RFC PATCH v3 04/11] drm, cgroup: Add total GEM buffer allocation limit

2019-06-28 Thread Kenny Ho
On Thu, Jun 27, 2019 at 5:24 PM Daniel Vetter  wrote:
> On Thu, Jun 27, 2019 at 02:42:43PM -0400, Kenny Ho wrote:
> > Um... I am going to get a bit philosophical here and suggest that the
> > idea of sharing (especially uncontrolled sharing) is inherently at odd
> > with containment.  It's like, if everybody is special, no one is
> > special.  Perhaps an alternative is to make this configurable so that
> > people can allow sharing knowing the caveat?  And just to be clear,
> > the current solution allows for sharing, even between cgroup.
>
> The thing is, why shouldn't we just allow it (with some documented
> caveat)?
>
> I mean if all people do is share it as your current patches allow, then
> there's nothing funny going on (at least if we go with just leaking the
> allocations). If we allow additional sharing, then that's a plus.
Um... perhaps I was being overly conservative :).  So let me
illustrate with an example to add more clarity and get more comments
on it.

Let say we have the following cgroup hierarchy (The letters are
cgroups with R being the root cgroup.  The numbers in brackets are
processes.  The processes are placed with the 'No Internal Process
Constraint' in mind.)
R (4, 5) -- A (6)
  \
B  C (7,8)
 \
   D (9)

Here is a list of operation and the associated effect on the size
track by the cgroups (for simplicity, each buffer is 1 unit in size.)
With current implementation (charge on buffer creation with
restriction on sharing.)
R   A   B   C   D   |Ops

1   0   0   0   0   |4 allocated a buffer
1   0   0   0   0   |4 shared a buffer with 5
1   0   0   0   0   |4 shared a buffer with 9
2   0   1   0   1   |9 allocated a buffer
3   0   2   1   1   |7 allocated a buffer
3   0   2   1   1   |7 shared a buffer with 8
3   0   2   1   1   |7 sharing with 9 (not allowed)
3   0   2   1   1   |7 sharing with 4 (not allowed)
3   0   2   1   1   |7 release a buffer
2   0   1   0   1   |8 release a buffer from 7

The suggestion as I understand it (charge per buffer reference with
unrestricted sharing.)
R   A   B   C   D   |Ops

1   0   0   0   0   |4 allocated a buffer
2   0   0   0   0   |4 shared a buffer with 5
3   0   0   0   1   |4 shared a buffer with 9
4   0   1   0   2   |9 allocated a buffer
5   0   2   1   1   |7 allocated a buffer
6   0   3   2   1   |7 shared a buffer with 8
7   0   4   2   2   |7 sharing with 9
8   0   4   2   2   |7 sharing with 4
7   0   3   1   2   |7 release a buffer
6   0   2   0   2   |8 release a buffer from 7

Is this a correct understanding of the suggestion?

Regards,
Kenny
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH RFC v4 00/16] new cgroup controller for gpu/drm subsystem

2019-09-03 Thread Kenny Ho
Hi Tejun,

Thanks for looking into this.  I can definitely help where I can and I
am sure other experts will jump in if I start misrepresenting the
reality :) (as Daniel already have done.)

Regarding your points, my understanding is that there isn't really a
TTM vs GEM situation anymore (there is an lwn.net article about that,
but it is more than a decade old.)  I believe GEM is the common
interface at this point and more and more features are being
refactored into it.  For example, AMD's driver uses TTM internally but
things are exposed via the GEM interface.

This GEM resource is actually the single number resource you just
referred to.  A GEM buffer (the drm.buffer.* resources) can be backed
by VRAM, or system memory or other type of memory.  The more fine
grain control is the drm.memory.* resources which still need more
discussion.  (As some of the functionalities in TTM are being
refactored into the GEM level.  I have seen some patches that make TTM
a subclass of GEM.)

This RFC can be grouped into 3 areas and they are fairly independent
so they can be reviewed separately: high level device memory control
(buffer.*), fine grain memory control and bandwidth (memory.*) and
compute resources (lgpu.*)  I think the memory.* resources are the
most controversial part but I think it's still needed.

Perhaps an analogy may help.  For a system, we have CPUs and memory.
And within memory, it can be backed by RAM or swap.  For GPU, each
device can have LGPUs and buffers.  And within the buffers, it can be
backed by VRAM, or system RAM or even swap.

As for setting the right amount, I think that's where the profiling
aspect of the *.stats comes in.  And while one can't necessary buy
more VRAM, it is still a useful knob to adjust if the intention is to
pack more work into a GPU device with predictable performance.  This
research on various GPU workload may be of interest:

A Taxonomy of GPGPU Performance Scaling
http://www.computermachines.org/joe/posters/iiswc2015_taxonomy.pdf
http://www.computermachines.org/joe/publications/pdfs/iiswc2015_taxonomy.pdf

(summary: GPU workload can be memory bound or compute bound.  So it's
possible to pack different workload together to improve utilization.)

Regards,
Kenny

On Tue, Sep 3, 2019 at 2:50 PM Tejun Heo  wrote:
>
> Hello, Daniel.
>
> On Tue, Sep 03, 2019 at 09:55:50AM +0200, Daniel Vetter wrote:
> > > * While breaking up and applying control to different types of
> > >   internal objects may seem attractive to folks who work day in and
> > >   day out with the subsystem, they aren't all that useful to users and
> > >   the siloed controls are likely to make the whole mechanism a lot
> > >   less useful.  We had the same problem with cgroup1 memcg - putting
> > >   control of different uses of memory under separate knobs.  It made
> > >   the whole thing pretty useless.  e.g. if you constrain all knobs
> > >   tight enough to control the overall usage, overall utilization
> > >   suffers, but if you don't, you really don't have control over actual
> > >   usage.  For memcg, what has to be allocated and controlled is
> > >   physical memory, no matter how they're used.  It's not like you can
> > >   go buy more "socket" memory.  At least from the looks of it, I'm
> > >   afraid gpu controller is repeating the same mistakes.
> >
> > We do have quite a pile of different memories and ranges, so I don't
> > thinkt we're doing the same mistake here. But it is maybe a bit too
>
> I see.  One thing which caught my eyes was the system memory control.
> Shouldn't that be controlled by memcg?  Is there something special
> about system memory used by gpus?
>
> > complicated, and exposes stuff that most users really don't care about.
>
> Could be from me not knowing much about gpus but definitely looks too
> complex to me.  I don't see how users would be able to alloate, vram,
> system memory and GART with reasonable accuracy.  memcg on cgroup2
> deals with just single number and that's already plenty challenging.
>
> Thanks.
>
> --
> tejun
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH RFC v4 00/16] new cgroup controller for gpu/drm subsystem

2019-09-03 Thread Kenny Ho
On Tue, Sep 3, 2019 at 5:20 AM Daniel Vetter  wrote:
>
> On Tue, Sep 3, 2019 at 10:24 AM Koenig, Christian
>  wrote:
> >
> > Am 03.09.19 um 10:02 schrieb Daniel Vetter:
> > > On Thu, Aug 29, 2019 at 02:05:17AM -0400, Kenny Ho wrote:
> > >> With this RFC v4, I am hoping to have some consensus on a merge plan.  I 
> > >> believe
> > >> the GEM related resources (drm.buffer.*) introduced in previous RFC and,
> > >> hopefully, the logical GPU concept (drm.lgpu.*) introduced in this RFC 
> > >> are
> > >> uncontroversial and ready to move out of RFC and into a more formal 
> > >> review.  I
> > >> will continue to work on the memory backend resources (drm.memory.*).
> > >>
> > >> The cover letter from v1 is copied below for reference.
> > >>
> > >> [v1]: 
> > >> https://lists.freedesktop.org/archives/dri-devel/2018-November/197106.html
> > >> [v2]: https://www.spinics.net/lists/cgroups/msg22074.html
> > >> [v3]: 
> > >> https://lists.freedesktop.org/archives/amd-gfx/2019-June/036026.html
> > > So looking at all this doesn't seem to have changed much, and the old
> > > discussion didn't really conclude anywhere (aside from some details).
> > >
> > > One more open though that crossed my mind, having read a ton of ttm again
> > > recently: How does this all interact with ttm global limits? I'd say the
> > > ttm global limits is the ur-cgroups we have in drm, and not looking at
> > > that seems kinda bad.
> >
> > At least my hope was to completely replace ttm globals with those
> > limitations here when it is ready.
>
> You need more, at least some kind of shrinker to cut down bo placed in
> system memory when we're under memory pressure. Which drags in a
> pretty epic amount of locking lols (see i915's shrinker fun, where we
> attempt that). Probably another good idea to share at least some
> concepts, maybe even code.

I am still looking into your shrinker suggestion so the memory.*
resources are untouch from RFC v3.  The main change for the buffer.*
resources is the removal of buffer sharing restriction as you
suggested and additional documentation of that behaviour.  (I may have
neglected mentioning it in the cover.)  The other key part of RFC v4
is the "logical GPU/lgpu" concept.  I am hoping to get it out there
early for feedback while I continue to work on the memory.* parts.

Kenny

> -Daniel
>
> >
> > Christian.
> >
> > > -Daniel
> > >
> > >> v4:
> > >> Unchanged (no review needed)
> > >> * drm.memory.*/ttm resources (Patch 9-13, I am still working on memory 
> > >> bandwidth
> > >> and shrinker)
> > >> Base on feedbacks on v3:
> > >> * update nominclature to drmcg
> > >> * embed per device drmcg properties into drm_device
> > >> * split GEM buffer related commits into stats and limit
> > >> * rename function name to align with convention
> > >> * combined buffer accounting and check into a try_charge function
> > >> * support buffer stats without limit enforcement
> > >> * removed GEM buffer sharing limitation
> > >> * updated documentations
> > >> New features:
> > >> * introducing logical GPU concept
> > >> * example implementation with AMD KFD
> > >>
> > >> v3:
> > >> Base on feedbacks on v2:
> > >> * removed .help type file from v2
> > >> * conform to cgroup convention for default and max handling
> > >> * conform to cgroup convention for addressing device specific limits 
> > >> (with major:minor)
> > >> New function:
> > >> * adopted memparse for memory size related attributes
> > >> * added macro to marshall drmcgrp cftype private  (DRMCG_CTF_PRIV, etc.)
> > >> * added ttm buffer usage stats (per cgroup, for system, tt, vram.)
> > >> * added ttm buffer usage limit (per cgroup, for vram.)
> > >> * added per cgroup bandwidth stats and limiting (burst and average 
> > >> bandwidth)
> > >>
> > >> v2:
> > >> * Removed the vendoring concepts
> > >> * Add limit to total buffer allocation
> > >> * Add limit to the maximum size of a buffer allocation
> > >>
> > >> v1: cover letter
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH RFC v4 01/16] drm: Add drm_minor_for_each

2019-09-03 Thread Kenny Ho
On Tue, Sep 3, 2019 at 3:57 AM Daniel Vetter  wrote:
>
> On Thu, Aug 29, 2019 at 02:05:18AM -0400, Kenny Ho wrote:
> > To allow other subsystems to iterate through all stored DRM minors and
> > act upon them.
> >
> > Also exposes drm_minor_acquire and drm_minor_release for other subsystem
> > to handle drm_minor.  DRM cgroup controller is the initial consumer of
> > this new features.
> >
> > Change-Id: I7c4b67ce6b31f06d1037b03435386ff5b8144ca5
> > Signed-off-by: Kenny Ho 
>
> Iterating over minors for cgroups sounds very, very wrong. Why do we care
> whether a buffer was allocated through kms dumb vs render nodes?
>
> I'd expect all the cgroup stuff to only work on drm_device, if it does
> care about devices.
>
> (I didn't look through the patch series to find out where exactly you're
> using this, so maybe I'm off the rails here).

I am exposing this to remove the need to keep track of a separate list
of available drm_device in the system (to remove the registering and
unregistering of drm_device to the cgroup subsystem and just use
drm_minor as the single source of truth.)  I am only filtering out the
render nodes minor because they point to the same drm_device and is
confusing.

Perhaps I missed an obvious way to list the drm devices without
iterating through the drm_minors?  (I probably jumped to the minors
because $major:$minor is the convention to address devices in cgroup.)

Kenny

> -Daniel
>
> > ---
> >  drivers/gpu/drm/drm_drv.c  | 19 +++
> >  drivers/gpu/drm/drm_internal.h |  4 
> >  include/drm/drm_drv.h  |  4 
> >  3 files changed, 23 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
> > index 862621494a93..000cddabd970 100644
> > --- a/drivers/gpu/drm/drm_drv.c
> > +++ b/drivers/gpu/drm/drm_drv.c
> > @@ -254,11 +254,13 @@ struct drm_minor *drm_minor_acquire(unsigned int 
> > minor_id)
> >
> >   return minor;
> >  }
> > +EXPORT_SYMBOL(drm_minor_acquire);
> >
> >  void drm_minor_release(struct drm_minor *minor)
> >  {
> >   drm_dev_put(minor->dev);
> >  }
> > +EXPORT_SYMBOL(drm_minor_release);
> >
> >  /**
> >   * DOC: driver instance overview
> > @@ -1078,6 +1080,23 @@ int drm_dev_set_unique(struct drm_device *dev, const 
> > char *name)
> >  }
> >  EXPORT_SYMBOL(drm_dev_set_unique);
> >
> > +/**
> > + * drm_minor_for_each - Iterate through all stored DRM minors
> > + * @fn: Function to be called for each pointer.
> > + * @data: Data passed to callback function.
> > + *
> > + * The callback function will be called for each @drm_minor entry, passing
> > + * the minor, the entry and @data.
> > + *
> > + * If @fn returns anything other than %0, the iteration stops and that
> > + * value is returned from this function.
> > + */
> > +int drm_minor_for_each(int (*fn)(int id, void *p, void *data), void *data)
> > +{
> > + return idr_for_each(_minors_idr, fn, data);
> > +}
> > +EXPORT_SYMBOL(drm_minor_for_each);
> > +
> >  /*
> >   * DRM Core
> >   * The DRM core module initializes all global DRM objects and makes them
> > diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
> > index e19ac7ca602d..6bfad76f8e78 100644
> > --- a/drivers/gpu/drm/drm_internal.h
> > +++ b/drivers/gpu/drm/drm_internal.h
> > @@ -54,10 +54,6 @@ void drm_prime_destroy_file_private(struct 
> > drm_prime_file_private *prime_fpriv);
> >  void drm_prime_remove_buf_handle_locked(struct drm_prime_file_private 
> > *prime_fpriv,
> >   struct dma_buf *dma_buf);
> >
> > -/* drm_drv.c */
> > -struct drm_minor *drm_minor_acquire(unsigned int minor_id);
> > -void drm_minor_release(struct drm_minor *minor);
> > -
> >  /* drm_vblank.c */
> >  void drm_vblank_disable_and_save(struct drm_device *dev, unsigned int 
> > pipe);
> >  void drm_vblank_cleanup(struct drm_device *dev);
> > diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h
> > index 68ca736c548d..24f8d054c570 100644
> > --- a/include/drm/drm_drv.h
> > +++ b/include/drm/drm_drv.h
> > @@ -799,5 +799,9 @@ static inline bool drm_drv_uses_atomic_modeset(struct 
> > drm_device *dev)
> >
> >  int drm_dev_set_unique(struct drm_device *dev, const char *name);
> >
> > +int drm_minor_for_each(int (*fn)(int id, void *p, void *data), void *data);
> > +
> > +struct drm_minor *drm_minor_acquire(unsigned int minor_id);
> > +void drm_minor_release(struct drm_minor *minor);
> >
> >  #endif
> > --
> > 2.22.0
> >
>
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH RFC v4 01/16] drm: Add drm_minor_for_each

2019-09-03 Thread Kenny Ho
On Tue, Sep 3, 2019 at 4:12 PM Daniel Vetter  wrote:
> On Tue, Sep 3, 2019 at 9:45 PM Kenny Ho  wrote:
> > On Tue, Sep 3, 2019 at 3:57 AM Daniel Vetter  wrote:
> > > Iterating over minors for cgroups sounds very, very wrong. Why do we care
> > > whether a buffer was allocated through kms dumb vs render nodes?
> > >
> > > I'd expect all the cgroup stuff to only work on drm_device, if it does
> > > care about devices.
> > >
> > > (I didn't look through the patch series to find out where exactly you're
> > > using this, so maybe I'm off the rails here).
> >
> > I am exposing this to remove the need to keep track of a separate list
> > of available drm_device in the system (to remove the registering and
> > unregistering of drm_device to the cgroup subsystem and just use
> > drm_minor as the single source of truth.)  I am only filtering out the
> > render nodes minor because they point to the same drm_device and is
> > confusing.
> >
> > Perhaps I missed an obvious way to list the drm devices without
> > iterating through the drm_minors?  (I probably jumped to the minors
> > because $major:$minor is the convention to address devices in cgroup.)
>
> Create your own if there's nothing, because you need to anyway:
> - You need special locking anyway, we can't just block on the idr lock
> for everything.
> - This needs to refcount drm_device, no the minors.
>
> Iterating over stuff still feels kinda wrong still, because normally
> the way we register/unregister userspace api (and cgroups isn't
> anything else from a drm driver pov) is by adding more calls to
> drm_dev_register/unregister. If you put a drm_cg_register/unregister
> call in there we have a clean separation, and you can track all the
> currently active devices however you want. Iterating over objects that
> can be hotunplugged any time tends to get really complicated really
> quickly.

Um... I thought this is what I had previously.  Did I misunderstood
your feedback from v3?  Doesn't drm_minor already include all these
facilities so isn't creating my own kind of reinventing the wheel?
(as I did previously?)  drm_minor_register is called inside
drm_dev_register so isn't leveraging existing drm_minor facilities
much better solution?

Kenny

>
>
> >
> > Kenny
> >
> > > -Daniel
> > >
> > > > ---
> > > >  drivers/gpu/drm/drm_drv.c  | 19 +++
> > > >  drivers/gpu/drm/drm_internal.h |  4 
> > > >  include/drm/drm_drv.h  |  4 
> > > >  3 files changed, 23 insertions(+), 4 deletions(-)
> > > >
> > > > diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
> > > > index 862621494a93..000cddabd970 100644
> > > > --- a/drivers/gpu/drm/drm_drv.c
> > > > +++ b/drivers/gpu/drm/drm_drv.c
> > > > @@ -254,11 +254,13 @@ struct drm_minor *drm_minor_acquire(unsigned int 
> > > > minor_id)
> > > >
> > > >   return minor;
> > > >  }
> > > > +EXPORT_SYMBOL(drm_minor_acquire);
> > > >
> > > >  void drm_minor_release(struct drm_minor *minor)
> > > >  {
> > > >   drm_dev_put(minor->dev);
> > > >  }
> > > > +EXPORT_SYMBOL(drm_minor_release);
> > > >
> > > >  /**
> > > >   * DOC: driver instance overview
> > > > @@ -1078,6 +1080,23 @@ int drm_dev_set_unique(struct drm_device *dev, 
> > > > const char *name)
> > > >  }
> > > >  EXPORT_SYMBOL(drm_dev_set_unique);
> > > >
> > > > +/**
> > > > + * drm_minor_for_each - Iterate through all stored DRM minors
> > > > + * @fn: Function to be called for each pointer.
> > > > + * @data: Data passed to callback function.
> > > > + *
> > > > + * The callback function will be called for each @drm_minor entry, 
> > > > passing
> > > > + * the minor, the entry and @data.
> > > > + *
> > > > + * If @fn returns anything other than %0, the iteration stops and that
> > > > + * value is returned from this function.
> > > > + */
> > > > +int drm_minor_for_each(int (*fn)(int id, void *p, void *data), void 
> > > > *data)
> > > > +{
> > > > + return idr_for_each(_minors_idr, fn, data);
> > > > +}
> > > > +EXPORT_SYMBOL(drm_minor_for_each);
> > > > +
> > > >  /*
> > > >   * DRM Core
> > > >   * The DRM core module initializes all global DRM obj

Re: [PATCH RFC v4 13/16] drm, cgroup: Allow more aggressive memory reclaim

2019-08-29 Thread Kenny Ho
Thanks for the feedback Christian.  I am still digging into this one.
Daniel suggested leveraging the Shrinker API for the functionality of this
commit in RFC v3 but I am still trying to figure it out how/if ttm fit with
shrinker (though the idea behind the shrinker API seems fairly
straightforward as far as I understand it currently.)

Regards,
Kenny

On Thu, Aug 29, 2019 at 3:08 AM Koenig, Christian 
wrote:

> Am 29.08.19 um 08:05 schrieb Kenny Ho:
> > Allow DRM TTM memory manager to register a work_struct, such that, when
> > a drmcgrp is under memory pressure, memory reclaiming can be triggered
> > immediately.
> >
> > Change-Id: I25ac04e2db9c19ff12652b88ebff18b44b2706d8
> > Signed-off-by: Kenny Ho 
> > ---
> >   drivers/gpu/drm/ttm/ttm_bo.c| 49 +
> >   include/drm/drm_cgroup.h| 16 +++
> >   include/drm/ttm/ttm_bo_driver.h |  2 ++
> >   kernel/cgroup/drm.c | 30 
> >   4 files changed, 97 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> > index d7e3d3128ebb..72efae694b7e 100644
> > --- a/drivers/gpu/drm/ttm/ttm_bo.c
> > +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> > @@ -1590,6 +1590,46 @@ int ttm_bo_evict_mm(struct ttm_bo_device *bdev,
> unsigned mem_type)
> >   }
> >   EXPORT_SYMBOL(ttm_bo_evict_mm);
> >
> > +static void ttm_bo_reclaim_wq(struct work_struct *work)
> > +{
> > + struct ttm_operation_ctx ctx = {
> > + .interruptible = false,
> > + .no_wait_gpu = false,
> > + .flags = TTM_OPT_FLAG_FORCE_ALLOC
> > + };
> > + struct ttm_mem_type_manager *man =
> > + container_of(work, struct ttm_mem_type_manager, reclaim_wq);
> > + struct ttm_bo_device *bdev = man->bdev;
> > + struct dma_fence *fence;
> > + int mem_type;
> > + int ret;
> > +
> > + for (mem_type = 0; mem_type < TTM_NUM_MEM_TYPES; mem_type++)
> > + if (>man[mem_type] == man)
> > + break;
> > +
> > + WARN_ON(mem_type >= TTM_NUM_MEM_TYPES);
> > + if (mem_type >= TTM_NUM_MEM_TYPES)
> > + return;
> > +
> > + if (!drmcg_mem_pressure_scan(bdev, mem_type))
> > + return;
> > +
> > + ret = ttm_mem_evict_first(bdev, mem_type, NULL, , NULL);
> > + if (ret)
> > + return;
> > +
> > + spin_lock(>move_lock);
> > + fence = dma_fence_get(man->move);
> > + spin_unlock(>move_lock);
> > +
> > + if (fence) {
> > + ret = dma_fence_wait(fence, false);
> > + dma_fence_put(fence);
> > + }
>
> Why do you want to block for the fence here? That is a rather bad idea
> and would break pipe-lining.
>
> Apart from that I don't think we should put that into TTM.
>
> Instead drmcg_register_device_mm() should get a function pointer which
> is called from a work item when the group is under pressure.
>
> TTM can then provides the function which can be called, but the actually
> registration is job of the device and not TTM.
>
> Regards,
> Christian.
>
> > +
> > +}
> > +
> >   int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type,
> >   unsigned long p_size)
> >   {
> > @@ -1624,6 +1664,13 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev,
> unsigned type,
> >   INIT_LIST_HEAD(>lru[i]);
> >   man->move = NULL;
> >
> > + pr_err("drmcg %p type %d\n", bdev->ddev, type);
> > +
> > + if (type <= TTM_PL_VRAM) {
> > + INIT_WORK(>reclaim_wq, ttm_bo_reclaim_wq);
> > + drmcg_register_device_mm(bdev->ddev, type,
> >reclaim_wq);
> > + }
> > +
> >   return 0;
> >   }
> >   EXPORT_SYMBOL(ttm_bo_init_mm);
> > @@ -1701,6 +1748,8 @@ int ttm_bo_device_release(struct ttm_bo_device
> *bdev)
> >   man = >man[i];
> >   if (man->has_type) {
> >   man->use_type = false;
> > + drmcg_unregister_device_mm(bdev->ddev, i);
> > + cancel_work_sync(>reclaim_wq);
> >   if ((i != TTM_PL_SYSTEM) && ttm_bo_clean_mm(bdev,
> i)) {
> >   ret = -EBUSY;
> >   pr_err("DRM memory manager type %d is not
> clean\n",
> > diff --git a/include/drm/drm_cgroup.h b/include/drm/

Re: [PATCH RFC v4 13/16] drm, cgroup: Allow more aggressive memory reclaim

2019-08-29 Thread Kenny Ho
Yes, and I think it has quite a lot of coupling with mm's page and
pressure mechanisms.  My current thought is to just copy the API but
have a separate implementation of "ttm_shrinker" and
"ttm_shrinker_control" or something like that.  I am certainly happy
to listen to additional feedbacks and suggestions.

Regards,
Kenny


On Thu, Aug 29, 2019 at 10:12 AM Koenig, Christian
 wrote:
>
> Yeah, that's also a really good idea as well.
>
> The problem with the shrinker API is that it only applies to system memory 
> currently.
>
> So you won't have a distinction which domain you need to evict stuff from.
>
> Regards,
> Christian.
>
> Am 29.08.19 um 16:07 schrieb Kenny Ho:
>
> Thanks for the feedback Christian.  I am still digging into this one.  Daniel 
> suggested leveraging the Shrinker API for the functionality of this commit in 
> RFC v3 but I am still trying to figure it out how/if ttm fit with shrinker 
> (though the idea behind the shrinker API seems fairly straightforward as far 
> as I understand it currently.)
>
> Regards,
> Kenny
>
> On Thu, Aug 29, 2019 at 3:08 AM Koenig, Christian  
> wrote:
>>
>> Am 29.08.19 um 08:05 schrieb Kenny Ho:
>> > Allow DRM TTM memory manager to register a work_struct, such that, when
>> > a drmcgrp is under memory pressure, memory reclaiming can be triggered
>> > immediately.
>> >
>> > Change-Id: I25ac04e2db9c19ff12652b88ebff18b44b2706d8
>> > Signed-off-by: Kenny Ho 
>> > ---
>> >   drivers/gpu/drm/ttm/ttm_bo.c| 49 +
>> >   include/drm/drm_cgroup.h| 16 +++
>> >   include/drm/ttm/ttm_bo_driver.h |  2 ++
>> >   kernel/cgroup/drm.c | 30 
>> >   4 files changed, 97 insertions(+)
>> >
>> > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>> > index d7e3d3128ebb..72efae694b7e 100644
>> > --- a/drivers/gpu/drm/ttm/ttm_bo.c
>> > +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>> > @@ -1590,6 +1590,46 @@ int ttm_bo_evict_mm(struct ttm_bo_device *bdev, 
>> > unsigned mem_type)
>> >   }
>> >   EXPORT_SYMBOL(ttm_bo_evict_mm);
>> >
>> > +static void ttm_bo_reclaim_wq(struct work_struct *work)
>> > +{
>> > + struct ttm_operation_ctx ctx = {
>> > + .interruptible = false,
>> > + .no_wait_gpu = false,
>> > + .flags = TTM_OPT_FLAG_FORCE_ALLOC
>> > + };
>> > + struct ttm_mem_type_manager *man =
>> > + container_of(work, struct ttm_mem_type_manager, reclaim_wq);
>> > + struct ttm_bo_device *bdev = man->bdev;
>> > + struct dma_fence *fence;
>> > + int mem_type;
>> > + int ret;
>> > +
>> > + for (mem_type = 0; mem_type < TTM_NUM_MEM_TYPES; mem_type++)
>> > + if (>man[mem_type] == man)
>> > + break;
>> > +
>> > + WARN_ON(mem_type >= TTM_NUM_MEM_TYPES);
>> > + if (mem_type >= TTM_NUM_MEM_TYPES)
>> > + return;
>> > +
>> > + if (!drmcg_mem_pressure_scan(bdev, mem_type))
>> > + return;
>> > +
>> > + ret = ttm_mem_evict_first(bdev, mem_type, NULL, , NULL);
>> > + if (ret)
>> > + return;
>> > +
>> > + spin_lock(>move_lock);
>> > + fence = dma_fence_get(man->move);
>> > + spin_unlock(>move_lock);
>> > +
>> > + if (fence) {
>> > + ret = dma_fence_wait(fence, false);
>> > + dma_fence_put(fence);
>> > + }
>>
>> Why do you want to block for the fence here? That is a rather bad idea
>> and would break pipe-lining.
>>
>> Apart from that I don't think we should put that into TTM.
>>
>> Instead drmcg_register_device_mm() should get a function pointer which
>> is called from a work item when the group is under pressure.
>>
>> TTM can then provides the function which can be called, but the actually
>> registration is job of the device and not TTM.
>>
>> Regards,
>> Christian.
>>
>> > +
>> > +}
>> > +
>> >   int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type,
>> >   unsigned long p_size)
>> >   {
>> > @@ -1624,6 +1664,13 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, 
>> > unsigned type,
>> >   INIT_LIST_HEAD(>lru[i]);

Re: [PATCH 3/3] drm/amdgpu: remove amdgpu_cs_try_evict

2019-09-02 Thread Kenny Ho
Ah ok, thanks for the explanation.  About the last bit, what is the
reason behind the differences in page size?  (I assume that's what you
meant by PTE?  Or is that something else?)

Regards,
Kenny

On Mon, Sep 2, 2019 at 10:31 AM Christian König
 wrote:
>
> Hi Kenny,
>
> When we do a CS we have a certain set of buffers which the submission is
> working with and are locked down while we prepare the submission.
>
> This working set contains of the buffers in the BO list as well as the
> one in the VM plus one or two for CSA and user fences etc..
>
> Now what can happen is that we find that we need to allocate some page
> tables during the CS and when a lot of BOs are locked down allocating a
> page table can fail because we can't evict other BOs.
>
> What this code tries todo is to evict stuff from the BO list to make
> room for VM BOs, but since now much more BOs are bound to the VM this
> doesn't work any more.
>
>
> The root of the problem is that it is really tricky to figure out how
> much memory you need for the page tables in the first place. See for a
> BO in VRAM we usually need only one PTE for each 2MB, but for a BO in
> system memory we need one PTE for each 4K of memory.
>
> So what can happen is that you evict something from VRAM because you
> need room and that eviction in turn makes you need even more room.
>
> It can take a while until this reaches a stable point, so this patch set
> here switched from a dynamic approach to just assuming the worst and
> reserving some memory for page tables.
>
> Regards,
> Christian.
>
> Am 02.09.19 um 16:07 schrieb Kenny Ho:
> > Hey Christian,
> >
> > Can you go into details a bit more on the how and why this doesn't
> > work well anymore?  (such as its relationship with per VM BOs?)  I am
> > curious to learn more because I was reading into this chunk of code
> > earlier.  Is this something that the Shrinker API can help with?
> >
> > Regards,
> > Kenny
> >
> > On Mon, Sep 2, 2019 at 6:52 AM Christian König
> >  wrote:
> >> Trying to evict things from the current working set doesn't work that
> >> well anymore because of per VM BOs.
> >>
> >> Rely on reserving VRAM for page tables to avoid contention.
> >>
> >> Signed-off-by: Christian König 
> >> ---
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu.h|  1 -
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 71 +-
> >>   2 files changed, 1 insertion(+), 71 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >> index a236213f8e8e..d1995156733e 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >> @@ -478,7 +478,6 @@ struct amdgpu_cs_parser {
> >>  uint64_tbytes_moved_vis_threshold;
> >>  uint64_tbytes_moved;
> >>  uint64_tbytes_moved_vis;
> >> -   struct amdgpu_bo_list_entry *evictable;
> >>
> >>  /* user fence */
> >>  struct amdgpu_bo_list_entry uf_entry;
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> >> index fd95b586b590..03182d968d3d 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> >> @@ -447,75 +447,12 @@ static int amdgpu_cs_bo_validate(struct 
> >> amdgpu_cs_parser *p,
> >>  return r;
> >>   }
> >>
> >> -/* Last resort, try to evict something from the current working set */
> >> -static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
> >> -   struct amdgpu_bo *validated)
> >> -{
> >> -   uint32_t domain = validated->allowed_domains;
> >> -   struct ttm_operation_ctx ctx = { true, false };
> >> -   int r;
> >> -
> >> -   if (!p->evictable)
> >> -   return false;
> >> -
> >> -   for (;>evictable->tv.head != >validated;
> >> -p->evictable = list_prev_entry(p->evictable, tv.head)) {
> >> -
> >> -   struct amdgpu_bo_list_entry *candidate = p->evictable;
> >> -   struct amdgpu_bo *bo = ttm_to_amdgpu_bo(candidate->tv.bo);
> >> -   struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
> >> -   bool update_bytes_moved_vis;
> >

Re: [PATCH 3/3] drm/amdgpu: remove amdgpu_cs_try_evict

2019-09-02 Thread Kenny Ho
Hey Christian,

Can you go into details a bit more on the how and why this doesn't
work well anymore?  (such as its relationship with per VM BOs?)  I am
curious to learn more because I was reading into this chunk of code
earlier.  Is this something that the Shrinker API can help with?

Regards,
Kenny

On Mon, Sep 2, 2019 at 6:52 AM Christian König
 wrote:
>
> Trying to evict things from the current working set doesn't work that
> well anymore because of per VM BOs.
>
> Rely on reserving VRAM for page tables to avoid contention.
>
> Signed-off-by: Christian König 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h|  1 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 71 +-
>  2 files changed, 1 insertion(+), 71 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index a236213f8e8e..d1995156733e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -478,7 +478,6 @@ struct amdgpu_cs_parser {
> uint64_tbytes_moved_vis_threshold;
> uint64_tbytes_moved;
> uint64_tbytes_moved_vis;
> -   struct amdgpu_bo_list_entry *evictable;
>
> /* user fence */
> struct amdgpu_bo_list_entry uf_entry;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index fd95b586b590..03182d968d3d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -447,75 +447,12 @@ static int amdgpu_cs_bo_validate(struct 
> amdgpu_cs_parser *p,
> return r;
>  }
>
> -/* Last resort, try to evict something from the current working set */
> -static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
> -   struct amdgpu_bo *validated)
> -{
> -   uint32_t domain = validated->allowed_domains;
> -   struct ttm_operation_ctx ctx = { true, false };
> -   int r;
> -
> -   if (!p->evictable)
> -   return false;
> -
> -   for (;>evictable->tv.head != >validated;
> -p->evictable = list_prev_entry(p->evictable, tv.head)) {
> -
> -   struct amdgpu_bo_list_entry *candidate = p->evictable;
> -   struct amdgpu_bo *bo = ttm_to_amdgpu_bo(candidate->tv.bo);
> -   struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
> -   bool update_bytes_moved_vis;
> -   uint32_t other;
> -
> -   /* If we reached our current BO we can forget it */
> -   if (bo == validated)
> -   break;
> -
> -   /* We can't move pinned BOs here */
> -   if (bo->pin_count)
> -   continue;
> -
> -   other = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
> -
> -   /* Check if this BO is in one of the domains we need space 
> for */
> -   if (!(other & domain))
> -   continue;
> -
> -   /* Check if we can move this BO somewhere else */
> -   other = bo->allowed_domains & ~domain;
> -   if (!other)
> -   continue;
> -
> -   /* Good we can try to move this BO somewhere else */
> -   update_bytes_moved_vis =
> -   !amdgpu_gmc_vram_full_visible(>gmc) &&
> -   amdgpu_bo_in_cpu_visible_vram(bo);
> -   amdgpu_bo_placement_from_domain(bo, other);
> -   r = ttm_bo_validate(>tbo, >placement, );
> -   p->bytes_moved += ctx.bytes_moved;
> -   if (update_bytes_moved_vis)
> -   p->bytes_moved_vis += ctx.bytes_moved;
> -
> -   if (unlikely(r))
> -   break;
> -
> -   p->evictable = list_prev_entry(p->evictable, tv.head);
> -   list_move(>tv.head, >validated);
> -
> -   return true;
> -   }
> -
> -   return false;
> -}
> -
>  static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo)
>  {
> struct amdgpu_cs_parser *p = param;
> int r;
>
> -   do {
> -   r = amdgpu_cs_bo_validate(p, bo);
> -   } while (r == -ENOMEM && amdgpu_cs_try_evict(p, bo));
> +   r = amdgpu_cs_bo_validate(p, bo);
> if (r)
> return r;
>
> @@ -554,9 +491,6 @@ static int amdgpu_cs_list_validate(struct 
> amdgpu_cs_parser *p,
> binding_userptr = true;
> }
>
> -   if (p->evictable == lobj)
> -   p->evictable = NULL;
> -
> r = amdgpu_cs_validate(p, bo);
> if (r)
> return r;
> @@ -659,9 +593,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser 
> *p,
>   >bytes_moved_vis_threshold);
> p->bytes_moved = 0;
>  

[PATCH RFC v4 01/16] drm: Add drm_minor_for_each

2019-08-29 Thread Kenny Ho
To allow other subsystems to iterate through all stored DRM minors and
act upon them.

Also exposes drm_minor_acquire and drm_minor_release for other subsystem
to handle drm_minor.  DRM cgroup controller is the initial consumer of
this new features.

Change-Id: I7c4b67ce6b31f06d1037b03435386ff5b8144ca5
Signed-off-by: Kenny Ho 
---
 drivers/gpu/drm/drm_drv.c  | 19 +++
 drivers/gpu/drm/drm_internal.h |  4 
 include/drm/drm_drv.h  |  4 
 3 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 862621494a93..000cddabd970 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -254,11 +254,13 @@ struct drm_minor *drm_minor_acquire(unsigned int minor_id)
 
return minor;
 }
+EXPORT_SYMBOL(drm_minor_acquire);
 
 void drm_minor_release(struct drm_minor *minor)
 {
drm_dev_put(minor->dev);
 }
+EXPORT_SYMBOL(drm_minor_release);
 
 /**
  * DOC: driver instance overview
@@ -1078,6 +1080,23 @@ int drm_dev_set_unique(struct drm_device *dev, const 
char *name)
 }
 EXPORT_SYMBOL(drm_dev_set_unique);
 
+/**
+ * drm_minor_for_each - Iterate through all stored DRM minors
+ * @fn: Function to be called for each pointer.
+ * @data: Data passed to callback function.
+ *
+ * The callback function will be called for each @drm_minor entry, passing
+ * the minor, the entry and @data.
+ *
+ * If @fn returns anything other than %0, the iteration stops and that
+ * value is returned from this function.
+ */
+int drm_minor_for_each(int (*fn)(int id, void *p, void *data), void *data)
+{
+   return idr_for_each(_minors_idr, fn, data);
+}
+EXPORT_SYMBOL(drm_minor_for_each);
+
 /*
  * DRM Core
  * The DRM core module initializes all global DRM objects and makes them
diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index e19ac7ca602d..6bfad76f8e78 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -54,10 +54,6 @@ void drm_prime_destroy_file_private(struct 
drm_prime_file_private *prime_fpriv);
 void drm_prime_remove_buf_handle_locked(struct drm_prime_file_private 
*prime_fpriv,
struct dma_buf *dma_buf);
 
-/* drm_drv.c */
-struct drm_minor *drm_minor_acquire(unsigned int minor_id);
-void drm_minor_release(struct drm_minor *minor);
-
 /* drm_vblank.c */
 void drm_vblank_disable_and_save(struct drm_device *dev, unsigned int pipe);
 void drm_vblank_cleanup(struct drm_device *dev);
diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h
index 68ca736c548d..24f8d054c570 100644
--- a/include/drm/drm_drv.h
+++ b/include/drm/drm_drv.h
@@ -799,5 +799,9 @@ static inline bool drm_drv_uses_atomic_modeset(struct 
drm_device *dev)
 
 int drm_dev_set_unique(struct drm_device *dev, const char *name);
 
+int drm_minor_for_each(int (*fn)(int id, void *p, void *data), void *data);
+
+struct drm_minor *drm_minor_acquire(unsigned int minor_id);
+void drm_minor_release(struct drm_minor *minor);
 
 #endif
-- 
2.22.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH RFC v4 07/16] drm, cgroup: Add total GEM buffer allocation limit

2019-08-29 Thread Kenny Ho
The drm resource being limited here is the GEM buffer objects.  User
applications allocate and free these buffers.  In addition, a process
can allocate a buffer and share it with another process.  The consumer
of a shared buffer can also outlive the allocator of the buffer.

For the purpose of cgroup accounting and limiting, ownership of the
buffer is deemed to be the cgroup for which the allocating process
belongs to.  There is one cgroup limit per drm device.

The limiting functionality is added to the previous stats collection
function.  The drm_gem_private_object_init is modified to have a return
value to allow failure due to cgroup limit.

The try_chg function only fails if the DRM cgroup properties has
limit_enforced set to true for the DRM device.  This is to allow the DRM
cgroup controller to collect usage stats without enforcing the limits.

drm.buffer.default
A read-only flat-keyed file which exists on the root cgroup.
Each entry is keyed by the drm device's major:minor.

Default limits on the total GEM buffer allocation in bytes.

drm.buffer.max
A read-write flat-keyed file which exists on all cgroups.  Each
entry is keyed by the drm device's major:minor.

Per device limits on the total GEM buffer allocation in byte.
This is a hard limit.  Attempts in allocating beyond the cgroup
limit will result in ENOMEM.  Shorthand understood by memparse
(such as k, m, g) can be used.

Set allocation limit for /dev/dri/card1 to 1GB
echo "226:1 1g" > drm.buffer.total.max

Set allocation limit for /dev/dri/card0 to 512MB
echo "226:0 512m" > drm.buffer.total.max

Change-Id: I96e0b7add4d331ed8bb267b3c9243d360c6e9903
Signed-off-by: Kenny Ho 
---
 Documentation/admin-guide/cgroup-v2.rst|  21 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c|   8 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |   6 +-
 drivers/gpu/drm/drm_gem.c  |  11 +-
 include/drm/drm_cgroup.h   |   7 +-
 include/drm/drm_gem.h  |   2 +-
 include/linux/cgroup_drm.h |   1 +
 kernel/cgroup/drm.c| 221 -
 8 files changed, 260 insertions(+), 17 deletions(-)

diff --git a/Documentation/admin-guide/cgroup-v2.rst 
b/Documentation/admin-guide/cgroup-v2.rst
index 4dc72339a9b6..e8fac2684179 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1919,6 +1919,27 @@ DRM Interface Files
 
Total number of GEM buffer allocated.
 
+  drm.buffer.default
+   A read-only flat-keyed file which exists on the root cgroup.
+   Each entry is keyed by the drm device's major:minor.
+
+   Default limits on the total GEM buffer allocation in bytes.
+
+  drm.buffer.max
+   A read-write flat-keyed file which exists on all cgroups.  Each
+   entry is keyed by the drm device's major:minor.
+
+   Per device limits on the total GEM buffer allocation in byte.
+   This is a hard limit.  Attempts in allocating beyond the cgroup
+   limit will result in ENOMEM.  Shorthand understood by memparse
+   (such as k, m, g) can be used.
+
+   Set allocation limit for /dev/dri/card1 to 1GB
+   echo "226:1 1g" > drm.buffer.total.max
+
+   Set allocation limit for /dev/dri/card0 to 512MB
+   echo "226:0 512m" > drm.buffer.total.max
+
 GEM Buffer Ownership
 
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index c0bbd3aa0558..163a4fbf0611 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1395,6 +1395,12 @@ amdgpu_get_crtc_scanout_position(struct drm_device *dev, 
unsigned int pipe,
  stime, etime, mode);
 }
 
+static void amdgpu_drmcg_custom_init(struct drm_device *dev,
+   struct drmcg_props *props)
+{
+   props->limit_enforced = true;
+}
+
 static struct drm_driver kms_driver = {
.driver_features =
DRIVER_USE_AGP | DRIVER_ATOMIC |
@@ -1431,6 +1437,8 @@ static struct drm_driver kms_driver = {
.gem_prime_vunmap = amdgpu_gem_prime_vunmap,
.gem_prime_mmap = amdgpu_gem_prime_mmap,
 
+   .drmcg_custom_init = amdgpu_drmcg_custom_init,
+
.name = DRIVER_NAME,
.desc = DRIVER_DESC,
.date = DRIVER_DATE,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 989b7b55cb2e..b1bd66be3e1a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -34,6 +34,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_amdkfd.h"
@@ -454,7 +455,10 @@ static int amdgpu_bo_do_create(struct amdgpu_device *ade

[PATCH RFC v4 00/16] new cgroup controller for gpu/drm subsystem

2019-08-29 Thread Kenny Ho
 to artificially limit DRM
resources availble to the applications.


Challenges

While there are common infrastructure in DRM that is shared across many vendors
(the scheduler [4] for example), there are also aspects of DRM that are vendor
specific.  To accommodate this, we borrowed the mechanism used by the cgroup to
handle different kinds of cgroup controller.

Resources for DRM are also often device (GPU) specific instead of system
specific and a system may contain more than one GPU.  For this, we borrowed some
of the ideas from RDMA cgroup controller.

Approach
===
To experiment with the idea of a DRM cgroup, we would like to start with basic
accounting and statistics, then continue to iterate and add regulating
mechanisms into the driver.

[1] https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
[2] https://lists.freedesktop.org/archives/intel-gfx/2018-January/153156.html
[3] https://www.spinics.net/lists/cgroups/msg20720.html
[4] https://elixir.bootlin.com/linux/latest/source/drivers/gpu/drm/scheduler
[5] https://kubernetes.io/docs/tasks/manage-gpus/scheduling-gpus/
[6] 
https://blog.openshift.com/gpu-accelerated-sql-queries-with-postgresql-pg-strom-in-openshift-3-10/
[7] https://github.com/RadeonOpenCompute/k8s-device-plugin
[8] https://github.com/kubernetes/kubernetes/issues/52757

Kenny Ho (16):
  drm: Add drm_minor_for_each
  cgroup: Introduce cgroup for drm subsystem
  drm, cgroup: Initialize drmcg properties
  drm, cgroup: Add total GEM buffer allocation stats
  drm, cgroup: Add peak GEM buffer allocation stats
  drm, cgroup: Add GEM buffer allocation count stats
  drm, cgroup: Add total GEM buffer allocation limit
  drm, cgroup: Add peak GEM buffer allocation limit
  drm, cgroup: Add TTM buffer allocation stats
  drm, cgroup: Add TTM buffer peak usage stats
  drm, cgroup: Add per cgroup bw measure and control
  drm, cgroup: Add soft VRAM limit
  drm, cgroup: Allow more aggressive memory reclaim
  drm, cgroup: Introduce lgpu as DRM cgroup resource
  drm, cgroup: add update trigger after limit change
  drm/amdgpu: Integrate with DRM cgroup

 Documentation/admin-guide/cgroup-v2.rst   |  163 +-
 Documentation/cgroup-v1/drm.rst   |1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|4 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |   29 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c|6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |3 +-
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  |6 +
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |3 +
 .../amd/amdkfd/kfd_process_queue_manager.c|  140 ++
 drivers/gpu/drm/drm_drv.c |   26 +
 drivers/gpu/drm/drm_gem.c |   16 +-
 drivers/gpu/drm/drm_internal.h|4 -
 drivers/gpu/drm/ttm/ttm_bo.c  |   93 ++
 drivers/gpu/drm/ttm/ttm_bo_util.c |4 +
 include/drm/drm_cgroup.h  |  122 ++
 include/drm/drm_device.h  |7 +
 include/drm/drm_drv.h |   23 +
 include/drm/drm_gem.h |   13 +-
 include/drm/ttm/ttm_bo_api.h  |2 +
 include/drm/ttm/ttm_bo_driver.h   |   10 +
 include/linux/cgroup_drm.h|  151 ++
 include/linux/cgroup_subsys.h |4 +
 init/Kconfig  |5 +
 kernel/cgroup/Makefile|1 +
 kernel/cgroup/drm.c   | 1367 +
 25 files changed, 2193 insertions(+), 10 deletions(-)
 create mode 100644 Documentation/cgroup-v1/drm.rst
 create mode 100644 include/drm/drm_cgroup.h
 create mode 100644 include/linux/cgroup_drm.h
 create mode 100644 kernel/cgroup/drm.c

-- 
2.22.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH RFC v4 02/16] cgroup: Introduce cgroup for drm subsystem

2019-08-29 Thread Kenny Ho
With the increased importance of machine learning, data science and
other cloud-based applications, GPUs are already in production use in
data centers today.  Existing GPU resource management is very coarse
grain, however, as sysadmins are only able to distribute workload on a
per-GPU basis.  An alternative is to use GPU virtualization (with or
without SRIOV) but it generally acts on the entire GPU instead of the
specific resources in a GPU.  With a drm cgroup controller, we can
enable alternate, fine-grain, sub-GPU resource management (in addition
to what may be available via GPU virtualization.)

Change-Id: I6830d3990f63f0c13abeba29b1d330cf28882831
Signed-off-by: Kenny Ho 
---
 Documentation/admin-guide/cgroup-v2.rst | 18 -
 Documentation/cgroup-v1/drm.rst |  1 +
 include/linux/cgroup_drm.h  | 92 +
 include/linux/cgroup_subsys.h   |  4 ++
 init/Kconfig|  5 ++
 kernel/cgroup/Makefile  |  1 +
 kernel/cgroup/drm.c | 42 +++
 7 files changed, 161 insertions(+), 2 deletions(-)
 create mode 100644 Documentation/cgroup-v1/drm.rst
 create mode 100644 include/linux/cgroup_drm.h
 create mode 100644 kernel/cgroup/drm.c

diff --git a/Documentation/admin-guide/cgroup-v2.rst 
b/Documentation/admin-guide/cgroup-v2.rst
index 88e746074252..2936423a3fd5 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -61,8 +61,10 @@ v1 is available under Documentation/cgroup-v1/.
  5-6. Device
  5-7. RDMA
5-7-1. RDMA Interface Files
- 5-8. Misc
-   5-8-1. perf_event
+ 5-8. DRM
+   5-8-1. DRM Interface Files
+ 5-9. Misc
+   5-9-1. perf_event
  5-N. Non-normative information
5-N-1. CPU controller root cgroup process behaviour
5-N-2. IO controller root cgroup process behaviour
@@ -1889,6 +1891,18 @@ RDMA Interface Files
  ocrdma1 hca_handle=1 hca_object=23
 
 
+DRM
+---
+
+The "drm" controller regulates the distribution and accounting of
+of DRM (Direct Rendering Manager) and GPU-related resources.
+
+DRM Interface Files
+
+
+TODO
+
+
 Misc
 
 
diff --git a/Documentation/cgroup-v1/drm.rst b/Documentation/cgroup-v1/drm.rst
new file mode 100644
index ..5f5658e1f5ed
--- /dev/null
+++ b/Documentation/cgroup-v1/drm.rst
@@ -0,0 +1 @@
+Please see ../cgroup-v2.rst for details
diff --git a/include/linux/cgroup_drm.h b/include/linux/cgroup_drm.h
new file mode 100644
index ..971166f9dd78
--- /dev/null
+++ b/include/linux/cgroup_drm.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: MIT
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ */
+#ifndef _CGROUP_DRM_H
+#define _CGROUP_DRM_H
+
+#ifdef CONFIG_CGROUP_DRM
+
+#include 
+
+/**
+ * The DRM cgroup controller data structure.
+ */
+struct drmcg {
+   struct cgroup_subsys_state  css;
+};
+
+/**
+ * css_to_drmcg - get the corresponding drmcg ref from a cgroup_subsys_state
+ * @css: the target cgroup_subsys_state
+ *
+ * Return: DRM cgroup that contains the @css
+ */
+static inline struct drmcg *css_to_drmcg(struct cgroup_subsys_state *css)
+{
+   return css ? container_of(css, struct drmcg, css) : NULL;
+}
+
+/**
+ * drmcg_get - get the drmcg reference that a task belongs to
+ * @task: the target task
+ *
+ * This increase the reference count of the css that the @task belongs to
+ *
+ * Return: reference to the DRM cgroup the task belongs to
+ */
+static inline struct drmcg *drmcg_get(struct task_struct *task)
+{
+   return css_to_drmcg(task_get_css(task, drm_cgrp_id));
+}
+
+/**
+ * drmcg_put - put a drmcg reference
+ * @drmcg: the target drmcg
+ *
+ * Put a reference obtained via drmcg_get
+ */
+static inline void drmcg_put(struct drmcg *drmcg)
+{
+   if (drmcg)
+   css_put(>css);
+}
+
+/**
+ * drmcg_parent - find the parent of a drm cgroup
+ * @cg: the target drmcg
+ *
+ * This does not increase the reference count of the parent cgroup
+ *
+ * Return: parent DRM cgroup of @cg
+ */
+static inline struct drmcg *drmcg_parent(struct drmcg *cg)
+{
+   return css_to_drmcg(cg->css.parent);
+}
+
+#else /* CONFIG_CGROUP_DRM */
+
+struct drmcg {
+};
+
+static inline struct drmcg *css_to_drmcg(struct cgroup_subsys_state *css)
+{
+   return NULL;
+}
+
+static inline struct drmcg *drmcg_get(struct task_struct *task)
+{
+   return NULL;
+}
+
+static inline void drmcg_put(struct drmcg *drmcg)
+{
+}
+
+static inline struct drmcg *drmcg_parent(struct drmcg *cg)
+{
+   return NULL;
+}
+
+#endif /* CONFIG_CGROUP_DRM */
+#endif /* _CGROUP_DRM_H */
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index acb77dcff3b4..ddedad809e8b 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -61,6 +61,10 @@ SUBSYS(pids)
 SUBSYS(rdma)
 #endif
 
+#if IS_ENABLED(CONFIG_CGROUP_DRM)
+SUBSYS(drm)
+#endif
+
 /*
  * The foll

[PATCH RFC v4 10/16] drm, cgroup: Add TTM buffer peak usage stats

2019-08-29 Thread Kenny Ho
drm.memory.peak.stats
A read-only nested-keyed file which exists on all cgroups.
Each entry is keyed by the drm device's major:minor.  The
following nested keys are defined.

  == ==
  system Peak host memory used
  tt Peak host memory used by the device (GTT/GART)
  vram   Peak Video RAM used by the drm device
  priv   Other drm device specific memory peak usage
  == ==

Reading returns the following::

226:0 system=0 tt=0 vram=0 priv=0
226:1 system=0 tt=9035776 vram=17768448 priv=16809984
226:2 system=0 tt=9035776 vram=17768448 priv=16809984

Change-Id: I986e44533848f66411465bdd52105e78105a709a
Signed-off-by: Kenny Ho 
---
 include/linux/cgroup_drm.h |  2 ++
 kernel/cgroup/drm.c| 19 +++
 2 files changed, 21 insertions(+)

diff --git a/include/linux/cgroup_drm.h b/include/linux/cgroup_drm.h
index 4c2794c9333d..9579e2a0b71d 100644
--- a/include/linux/cgroup_drm.h
+++ b/include/linux/cgroup_drm.h
@@ -20,6 +20,7 @@ enum drmcg_res_type {
DRMCG_TYPE_BO_COUNT,
DRMCG_TYPE_MEM,
DRMCG_TYPE_MEM_EVICT,
+   DRMCG_TYPE_MEM_PEAK,
__DRMCG_TYPE_LAST,
 };
 
@@ -37,6 +38,7 @@ struct drmcg_device_resource {
s64 bo_stats_count_allocated;
 
s64 mem_stats[TTM_PL_PRIV+1];
+   s64 mem_peaks[TTM_PL_PRIV+1];
s64 mem_stats_evict;
 };
 
diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
index 4960a8d1e8f4..899dc44722c3 100644
--- a/kernel/cgroup/drm.c
+++ b/kernel/cgroup/drm.c
@@ -162,6 +162,13 @@ static void drmcg_print_stats(struct drmcg_device_resource 
*ddr,
case DRMCG_TYPE_MEM_EVICT:
seq_printf(sf, "%lld\n", ddr->mem_stats_evict);
break;
+   case DRMCG_TYPE_MEM_PEAK:
+   for (i = 0; i <= TTM_PL_PRIV; i++) {
+   seq_printf(sf, "%s=%lld ", ttm_placement_names[i],
+   ddr->mem_peaks[i]);
+   }
+   seq_puts(sf, "\n");
+   break;
default:
seq_puts(sf, "\n");
break;
@@ -443,6 +450,12 @@ struct cftype files[] = {
.private = DRMCG_CTF_PRIV(DRMCG_TYPE_MEM_EVICT,
DRMCG_FTYPE_STATS),
},
+   {
+   .name = "memory.peaks.stats",
+   .seq_show = drmcg_seq_show,
+   .private = DRMCG_CTF_PRIV(DRMCG_TYPE_MEM_PEAK,
+   DRMCG_FTYPE_STATS),
+   },
{ } /* terminate */
 };
 
@@ -617,6 +630,8 @@ void drmcg_chg_mem(struct ttm_buffer_object *tbo)
for ( ; drmcg != NULL; drmcg = drmcg_parent(drmcg)) {
ddr = drmcg->dev_resources[devIdx];
ddr->mem_stats[mem_type] += size;
+   ddr->mem_peaks[mem_type] = max(ddr->mem_peaks[mem_type],
+   ddr->mem_stats[mem_type]);
}
mutex_unlock(>drmcg_mutex);
 }
@@ -668,6 +683,10 @@ void drmcg_mem_track_move(struct ttm_buffer_object 
*old_bo, bool evict,
ddr->mem_stats[old_mem_type] -= move_in_bytes;
ddr->mem_stats[new_mem_type] += move_in_bytes;
 
+   ddr->mem_peaks[new_mem_type] = max(
+   ddr->mem_peaks[new_mem_type],
+   ddr->mem_stats[new_mem_type]);
+
if (evict)
ddr->mem_stats_evict++;
}
-- 
2.22.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH RFC v4 03/16] drm, cgroup: Initialize drmcg properties

2019-08-29 Thread Kenny Ho
drmcg initialization involves allocating a per cgroup, per device data
structure and setting the defaults.  There are two entry points for
drmcg init:

1) When struct drmcg is created via css_alloc, initialization is done
for each device

2) When DRM devices are created after drmcgs are created
  a) Per device drmcg data structure is allocated at the beginning of
  DRM device creation such that drmcg can begin tracking usage
  statistics
  b) At the end of DRM device creation, drmcg_device_update is called in
  case device specific defaults need to be applied.

Entry point #2 usually applies to the root cgroup since it can be
created before DRM devices are available.  The drmcg controller will go
through all existing drm cgroups and initialize them with the new device
accordingly.

Change-Id: I908ee6975ea0585e4c30eafde4599f87094d8c65
Signed-off-by: Kenny Ho 
---
 drivers/gpu/drm/drm_drv.c  |   7 +++
 include/drm/drm_cgroup.h   |  27 
 include/drm/drm_device.h   |   7 +++
 include/drm/drm_drv.h  |   9 +++
 include/linux/cgroup_drm.h |  13 
 kernel/cgroup/drm.c| 123 +
 6 files changed, 186 insertions(+)
 create mode 100644 include/drm/drm_cgroup.h

diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 000cddabd970..94265eba68ca 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -37,6 +37,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "drm_crtc_internal.h"
 #include "drm_legacy.h"
@@ -672,6 +673,7 @@ int drm_dev_init(struct drm_device *dev,
mutex_init(>filelist_mutex);
mutex_init(>clientlist_mutex);
mutex_init(>master_mutex);
+   mutex_init(>drmcg_mutex);
 
dev->anon_inode = drm_fs_inode_new();
if (IS_ERR(dev->anon_inode)) {
@@ -708,6 +710,7 @@ int drm_dev_init(struct drm_device *dev,
if (ret)
goto err_setunique;
 
+   drmcg_device_early_init(dev);
return 0;
 
 err_setunique:
@@ -722,6 +725,7 @@ int drm_dev_init(struct drm_device *dev,
drm_fs_inode_free(dev->anon_inode);
 err_free:
put_device(dev->dev);
+   mutex_destroy(>drmcg_mutex);
mutex_destroy(>master_mutex);
mutex_destroy(>clientlist_mutex);
mutex_destroy(>filelist_mutex);
@@ -798,6 +802,7 @@ void drm_dev_fini(struct drm_device *dev)
 
put_device(dev->dev);
 
+   mutex_destroy(>drmcg_mutex);
mutex_destroy(>master_mutex);
mutex_destroy(>clientlist_mutex);
mutex_destroy(>filelist_mutex);
@@ -1008,6 +1013,8 @@ int drm_dev_register(struct drm_device *dev, unsigned 
long flags)
 dev->dev ? dev_name(dev->dev) : "virtual device",
 dev->primary->index);
 
+   drmcg_device_update(dev);
+
goto out_unlock;
 
 err_minors:
diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h
new file mode 100644
index ..bef9f9245924
--- /dev/null
+++ b/include/drm/drm_cgroup.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ */
+#ifndef __DRM_CGROUP_H__
+#define __DRM_CGROUP_H__
+
+/**
+ * Per DRM device properties for DRM cgroup controller for the purpose
+ * of storing per device defaults
+ */
+struct drmcg_props {
+};
+
+#ifdef CONFIG_CGROUP_DRM
+
+void drmcg_device_update(struct drm_device *device);
+void drmcg_device_early_init(struct drm_device *device);
+#else
+static inline void drmcg_device_update(struct drm_device *device)
+{
+}
+
+static inline void drmcg_device_early_init(struct drm_device *device)
+{
+}
+#endif /* CONFIG_CGROUP_DRM */
+#endif /* __DRM_CGROUP_H__ */
diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h
index 7f9ef709b2b6..5d7d779a5083 100644
--- a/include/drm/drm_device.h
+++ b/include/drm/drm_device.h
@@ -8,6 +8,7 @@
 
 #include 
 #include 
+#include 
 
 struct drm_driver;
 struct drm_minor;
@@ -304,6 +305,12 @@ struct drm_device {
 */
struct drm_fb_helper *fb_helper;
 
+/** \name DRM Cgroup */
+   /*@{ */
+   struct mutex drmcg_mutex;
+   struct drmcg_props drmcg_props;
+   /*@} */
+
/* Everything below here is for legacy driver, never use! */
/* private: */
 #if IS_ENABLED(CONFIG_DRM_LEGACY)
diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h
index 24f8d054c570..c8a37a08d98d 100644
--- a/include/drm/drm_drv.h
+++ b/include/drm/drm_drv.h
@@ -660,6 +660,15 @@ struct drm_driver {
struct drm_device *dev,
uint32_t handle);
 
+   /**
+* @drmcg_custom_init
+*
+* Optional callback used to initialize drm cgroup per device properties
+* such as resource limit defaults.
+*/
+   void (*drmcg_custom_init)(struct drm_device *dev,
+   struct drmcg_props *props);
+
/**

[PATCH RFC v4 13/16] drm, cgroup: Allow more aggressive memory reclaim

2019-08-29 Thread Kenny Ho
Allow DRM TTM memory manager to register a work_struct, such that, when
a drmcgrp is under memory pressure, memory reclaiming can be triggered
immediately.

Change-Id: I25ac04e2db9c19ff12652b88ebff18b44b2706d8
Signed-off-by: Kenny Ho 
---
 drivers/gpu/drm/ttm/ttm_bo.c| 49 +
 include/drm/drm_cgroup.h| 16 +++
 include/drm/ttm/ttm_bo_driver.h |  2 ++
 kernel/cgroup/drm.c | 30 
 4 files changed, 97 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index d7e3d3128ebb..72efae694b7e 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1590,6 +1590,46 @@ int ttm_bo_evict_mm(struct ttm_bo_device *bdev, unsigned 
mem_type)
 }
 EXPORT_SYMBOL(ttm_bo_evict_mm);
 
+static void ttm_bo_reclaim_wq(struct work_struct *work)
+{
+   struct ttm_operation_ctx ctx = {
+   .interruptible = false,
+   .no_wait_gpu = false,
+   .flags = TTM_OPT_FLAG_FORCE_ALLOC
+   };
+   struct ttm_mem_type_manager *man =
+   container_of(work, struct ttm_mem_type_manager, reclaim_wq);
+   struct ttm_bo_device *bdev = man->bdev;
+   struct dma_fence *fence;
+   int mem_type;
+   int ret;
+
+   for (mem_type = 0; mem_type < TTM_NUM_MEM_TYPES; mem_type++)
+   if (>man[mem_type] == man)
+   break;
+
+   WARN_ON(mem_type >= TTM_NUM_MEM_TYPES);
+   if (mem_type >= TTM_NUM_MEM_TYPES)
+   return;
+
+   if (!drmcg_mem_pressure_scan(bdev, mem_type))
+   return;
+
+   ret = ttm_mem_evict_first(bdev, mem_type, NULL, , NULL);
+   if (ret)
+   return;
+
+   spin_lock(>move_lock);
+   fence = dma_fence_get(man->move);
+   spin_unlock(>move_lock);
+
+   if (fence) {
+   ret = dma_fence_wait(fence, false);
+   dma_fence_put(fence);
+   }
+
+}
+
 int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type,
unsigned long p_size)
 {
@@ -1624,6 +1664,13 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned 
type,
INIT_LIST_HEAD(>lru[i]);
man->move = NULL;
 
+   pr_err("drmcg %p type %d\n", bdev->ddev, type);
+
+   if (type <= TTM_PL_VRAM) {
+   INIT_WORK(>reclaim_wq, ttm_bo_reclaim_wq);
+   drmcg_register_device_mm(bdev->ddev, type, >reclaim_wq);
+   }
+
return 0;
 }
 EXPORT_SYMBOL(ttm_bo_init_mm);
@@ -1701,6 +1748,8 @@ int ttm_bo_device_release(struct ttm_bo_device *bdev)
man = >man[i];
if (man->has_type) {
man->use_type = false;
+   drmcg_unregister_device_mm(bdev->ddev, i);
+   cancel_work_sync(>reclaim_wq);
if ((i != TTM_PL_SYSTEM) && ttm_bo_clean_mm(bdev, i)) {
ret = -EBUSY;
pr_err("DRM memory manager type %d is not 
clean\n",
diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h
index c11df388fdf2..6d9707e1eb72 100644
--- a/include/drm/drm_cgroup.h
+++ b/include/drm/drm_cgroup.h
@@ -5,6 +5,7 @@
 #define __DRM_CGROUP_H__
 
 #include 
+#include 
 #include 
 #include 
 
@@ -25,12 +26,17 @@ struct drmcg_props {
s64 mem_bw_avg_bytes_per_us_default;
 
s64 mem_highs_default[TTM_PL_PRIV+1];
+
+   struct work_struct  *mem_reclaim_wq[TTM_PL_PRIV];
 };
 
 #ifdef CONFIG_CGROUP_DRM
 
 void drmcg_device_update(struct drm_device *device);
 void drmcg_device_early_init(struct drm_device *device);
+void drmcg_register_device_mm(struct drm_device *dev, unsigned int type,
+   struct work_struct *wq);
+void drmcg_unregister_device_mm(struct drm_device *dev, unsigned int type);
 bool drmcg_try_chg_bo_alloc(struct drmcg *drmcg, struct drm_device *dev,
size_t size);
 void drmcg_unchg_bo_alloc(struct drmcg *drmcg, struct drm_device *dev,
@@ -53,6 +59,16 @@ static inline void drmcg_device_early_init(struct drm_device 
*device)
 {
 }
 
+static inline void drmcg_register_device_mm(struct drm_device *dev,
+   unsigned int type, struct work_struct *wq)
+{
+}
+
+static inline void drmcg_unregister_device_mm(struct drm_device *dev,
+   unsigned int type)
+{
+}
+
 static inline void drmcg_try_chg_bo_alloc(struct drmcg *drmcg,
struct drm_device *dev, size_t size)
 {
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index e1a805d65b83..529cef92bcf6 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -205,6 +205,8 @@ struct ttm_mem_type_manager {
 * Protected by @move_lock.
 */
struct dma_fence *move;
+
+   struct work_struct reclaim_wq;
 };
 
 /**

[PATCH RFC v4 06/16] drm, cgroup: Add GEM buffer allocation count stats

2019-08-29 Thread Kenny Ho
drm.buffer.count.stats
A read-only flat-keyed file which exists on all cgroups.  Each
entry is keyed by the drm device's major:minor.

Total number of GEM buffer allocated.

Change-Id: Id3e1809d5fee8562e47a7d2b961688956d844ec6
Signed-off-by: Kenny Ho 
---
 Documentation/admin-guide/cgroup-v2.rst |  6 ++
 include/linux/cgroup_drm.h  |  3 +++
 kernel/cgroup/drm.c | 22 +++---
 3 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/Documentation/admin-guide/cgroup-v2.rst 
b/Documentation/admin-guide/cgroup-v2.rst
index 8588a0ffc69d..4dc72339a9b6 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1913,6 +1913,12 @@ DRM Interface Files
 
Largest (high water mark) GEM buffer allocated in bytes.
 
+  drm.buffer.count.stats
+   A read-only flat-keyed file which exists on all cgroups.  Each
+   entry is keyed by the drm device's major:minor.
+
+   Total number of GEM buffer allocated.
+
 GEM Buffer Ownership
 
 
diff --git a/include/linux/cgroup_drm.h b/include/linux/cgroup_drm.h
index 974d390cfa4f..972f7aa975b5 100644
--- a/include/linux/cgroup_drm.h
+++ b/include/linux/cgroup_drm.h
@@ -16,6 +16,7 @@
 enum drmcg_res_type {
DRMCG_TYPE_BO_TOTAL,
DRMCG_TYPE_BO_PEAK,
+   DRMCG_TYPE_BO_COUNT,
__DRMCG_TYPE_LAST,
 };
 
@@ -27,6 +28,8 @@ struct drmcg_device_resource {
s64 bo_stats_total_allocated;
 
s64 bo_stats_peak_allocated;
+
+   s64 bo_stats_count_allocated;
 };
 
 /**
diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
index 0bf5b95668c4..85e46ece4a82 100644
--- a/kernel/cgroup/drm.c
+++ b/kernel/cgroup/drm.c
@@ -132,6 +132,9 @@ static void drmcg_print_stats(struct drmcg_device_resource 
*ddr,
case DRMCG_TYPE_BO_PEAK:
seq_printf(sf, "%lld\n", ddr->bo_stats_peak_allocated);
break;
+   case DRMCG_TYPE_BO_COUNT:
+   seq_printf(sf, "%lld\n", ddr->bo_stats_count_allocated);
+   break;
default:
seq_puts(sf, "\n");
break;
@@ -186,6 +189,12 @@ struct cftype files[] = {
.private = DRMCG_CTF_PRIV(DRMCG_TYPE_BO_PEAK,
DRMCG_FTYPE_STATS),
},
+   {
+   .name = "buffer.count.stats",
+   .seq_show = drmcg_seq_show,
+   .private = DRMCG_CTF_PRIV(DRMCG_TYPE_BO_COUNT,
+   DRMCG_FTYPE_STATS),
+   },
{ } /* terminate */
 };
 
@@ -272,6 +281,8 @@ void drmcg_chg_bo_alloc(struct drmcg *drmcg, struct 
drm_device *dev,
 
if (ddr->bo_stats_peak_allocated < (s64)size)
ddr->bo_stats_peak_allocated = (s64)size;
+
+   ddr->bo_stats_count_allocated++;
}
mutex_unlock(>drmcg_mutex);
 }
@@ -289,15 +300,20 @@ EXPORT_SYMBOL(drmcg_chg_bo_alloc);
 void drmcg_unchg_bo_alloc(struct drmcg *drmcg, struct drm_device *dev,
size_t size)
 {
+   struct drmcg_device_resource *ddr;
int devIdx = dev->primary->index;
 
if (drmcg == NULL)
return;
 
mutex_lock(>drmcg_mutex);
-   for ( ; drmcg != NULL; drmcg = drmcg_parent(drmcg))
-   drmcg->dev_resources[devIdx]->bo_stats_total_allocated
-   -= (s64)size;
+   for ( ; drmcg != NULL; drmcg = drmcg_parent(drmcg)) {
+   ddr = drmcg->dev_resources[devIdx];
+
+   ddr->bo_stats_total_allocated -= (s64)size;
+
+   ddr->bo_stats_count_allocated--;
+   }
mutex_unlock(>drmcg_mutex);
 }
 EXPORT_SYMBOL(drmcg_unchg_bo_alloc);
-- 
2.22.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH RFC v4 12/16] drm, cgroup: Add soft VRAM limit

2019-08-29 Thread Kenny Ho
The drm resource being limited is the TTM (Translation Table Manager)
buffers.  TTM manages different types of memory that a GPU might access.
These memory types include dedicated Video RAM (VRAM) and host/system
memory accessible through IOMMU (GART/GTT).  TTM is currently used by
multiple drm drivers (amd, ast, bochs, cirrus, hisilicon, maga200,
nouveau, qxl, virtio, vmwgfx.)

TTM buffers belonging to drm cgroups under memory pressure will be
selected to be evicted first.

drm.memory.high
A read-write nested-keyed file which exists on all cgroups.
Each entry is keyed by the drm device's major:minor.  The
following nested keys are defined.

   =
  vram Video RAM soft limit for a drm device in byte
   =

Reading returns the following::

226:0 vram=0
226:1 vram=17768448
226:2 vram=17768448

drm.memory.default
A read-only nested-keyed file which exists on the root cgroup.
Each entry is keyed by the drm device's major:minor.  The
following nested keys are defined.

   ===
  vram Video RAM default limit in byte
   ===

Reading returns the following::

226:0 vram=0
226:1 vram=17768448
226:2 vram=17768448

Change-Id: I7988e28a453b53140b40a28c176239acbc81d491
Signed-off-by: Kenny Ho 
---
 drivers/gpu/drm/ttm/ttm_bo.c |   7 ++
 include/drm/drm_cgroup.h |  17 +
 include/linux/cgroup_drm.h   |   2 +
 kernel/cgroup/drm.c  | 135 +++
 4 files changed, 161 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 32eee85f3641..d7e3d3128ebb 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -853,14 +853,21 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
struct ttm_bo_global *glob = bdev->glob;
struct ttm_mem_type_manager *man = >man[mem_type];
bool locked = false;
+   bool check_drmcg;
unsigned i;
int ret;
 
+   check_drmcg = drmcg_mem_pressure_scan(bdev, mem_type);
+
spin_lock(>lru_lock);
for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
list_for_each_entry(bo, >lru[i], lru) {
bool busy;
 
+   if (check_drmcg &&
+   !drmcg_mem_should_evict(bo, mem_type))
+   continue;
+
if (!ttm_bo_evict_swapout_allowable(bo, ctx, ,
)) {
if (busy && !busy_bo &&
diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h
index 9ce0d54e6bd8..c11df388fdf2 100644
--- a/include/drm/drm_cgroup.h
+++ b/include/drm/drm_cgroup.h
@@ -6,6 +6,7 @@
 
 #include 
 #include 
+#include 
 
 /**
  * Per DRM device properties for DRM cgroup controller for the purpose
@@ -22,6 +23,8 @@ struct drmcg_props {
 
s64 mem_bw_bytes_in_period_default;
s64 mem_bw_avg_bytes_per_us_default;
+
+   s64 mem_highs_default[TTM_PL_PRIV+1];
 };
 
 #ifdef CONFIG_CGROUP_DRM
@@ -38,6 +41,8 @@ void drmcg_mem_track_move(struct ttm_buffer_object *old_bo, 
bool evict,
struct ttm_mem_reg *new_mem);
 unsigned int drmcg_get_mem_bw_period_in_us(struct ttm_buffer_object *tbo);
 bool drmcg_mem_can_move(struct ttm_buffer_object *tbo);
+bool drmcg_mem_pressure_scan(struct ttm_bo_device *bdev, unsigned int type);
+bool drmcg_mem_should_evict(struct ttm_buffer_object *tbo, unsigned int type);
 
 #else
 static inline void drmcg_device_update(struct drm_device *device)
@@ -81,5 +86,17 @@ static inline bool drmcg_mem_can_move(struct 
ttm_buffer_object *tbo)
 {
return true;
 }
+
+static inline bool drmcg_mem_pressure_scan(struct ttm_bo_device *bdev,
+   unsigned int type)
+{
+   return false;
+}
+
+static inline bool drmcg_mem_should_evict(struct ttm_buffer_object *tbo,
+   unsigned int type)
+{
+   return true;
+}
 #endif /* CONFIG_CGROUP_DRM */
 #endif /* __DRM_CGROUP_H__ */
diff --git a/include/linux/cgroup_drm.h b/include/linux/cgroup_drm.h
index 27809a583bf2..c56cfe74d1a6 100644
--- a/include/linux/cgroup_drm.h
+++ b/include/linux/cgroup_drm.h
@@ -50,6 +50,8 @@ struct drmcg_device_resource {
 
s64 mem_stats[TTM_PL_PRIV+1];
s64 mem_peaks[TTM_PL_PRIV+1];
+   s64 mem_highs[TTM_PL_PRIV+1];
+   boolmem_pressure[TTM_PL_PRIV+1];
s64 mem_stats_evict;
 
s64 mem_bw_stats_last_update_us;
diff --gi

[PATCH RFC v4 16/16] drm/amdgpu: Integrate with DRM cgroup

2019-08-29 Thread Kenny Ho
The number of logical gpu (lgpu) is defined to be the number of compute
unit (CU) for a device.  The lgpu allocation limit only applies to
compute workload for the moment (enforced via kfd queue creation.)  Any
cu_mask update is validated against the availability of the compute unit
as defined by the drmcg the kfd process belongs to.

Change-Id: I69a57452c549173a1cd623c30dc57195b3b6563e
Signed-off-by: Kenny Ho 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|   4 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |  21 +++
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  |   6 +
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |   3 +
 .../amd/amdkfd/kfd_process_queue_manager.c| 140 ++
 5 files changed, 174 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 55cb1b2094fd..369915337213 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -198,6 +198,10 @@ uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev 
*dst, struct kgd_dev *s
valid;  \
})
 
+int amdgpu_amdkfd_update_cu_mask_for_process(struct task_struct *task,
+   struct amdgpu_device *adev, unsigned long *lgpu_bitmap,
+   unsigned int nbits);
+
 /* GPUVM API */
 int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, unsigned int 
pasid,
void **vm, void **process_info,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 163a4fbf0611..8abeffdd2e5b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1398,9 +1398,29 @@ amdgpu_get_crtc_scanout_position(struct drm_device *dev, 
unsigned int pipe,
 static void amdgpu_drmcg_custom_init(struct drm_device *dev,
struct drmcg_props *props)
 {
+   struct amdgpu_device *adev = dev->dev_private;
+
+   props->lgpu_capacity = adev->gfx.cu_info.number;
+
props->limit_enforced = true;
 }
 
+static void amdgpu_drmcg_limit_updated(struct drm_device *dev,
+   struct task_struct *task, struct drmcg_device_resource *ddr,
+   enum drmcg_res_type res_type)
+{
+   struct amdgpu_device *adev = dev->dev_private;
+
+   switch (res_type) {
+   case DRMCG_TYPE_LGPU:
+   amdgpu_amdkfd_update_cu_mask_for_process(task, adev,
+ddr->lgpu_allocated, dev->drmcg_props.lgpu_capacity);
+   break;
+   default:
+   break;
+   }
+}
+
 static struct drm_driver kms_driver = {
.driver_features =
DRIVER_USE_AGP | DRIVER_ATOMIC |
@@ -1438,6 +1458,7 @@ static struct drm_driver kms_driver = {
.gem_prime_mmap = amdgpu_gem_prime_mmap,
 
.drmcg_custom_init = amdgpu_drmcg_custom_init,
+   .drmcg_limit_updated = amdgpu_drmcg_limit_updated,
 
.name = DRIVER_NAME,
.desc = DRIVER_DESC,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 138c70454e2b..fa765b803f97 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -450,6 +450,12 @@ static int kfd_ioctl_set_cu_mask(struct file *filp, struct 
kfd_process *p,
return -EFAULT;
}
 
+   if (!pqm_drmcg_lgpu_validate(p, args->queue_id, properties.cu_mask, 
cu_mask_size)) {
+   pr_debug("CU mask not permitted by DRM Cgroup");
+   kfree(properties.cu_mask);
+   return -EACCES;
+   }
+
mutex_lock(>mutex);
 
retval = pqm_set_cu_mask(>pqm, args->queue_id, );
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 8b0eee5b3521..1bec7550 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1038,6 +1038,9 @@ int pqm_get_wave_state(struct process_queue_manager *pqm,
   u32 *ctl_stack_used_size,
   u32 *save_area_used_size);
 
+bool pqm_drmcg_lgpu_validate(struct kfd_process *p, int qid, u32 *cu_mask,
+   unsigned int cu_mask_size);
+
 int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
unsigned int fence_value,
unsigned int timeout_ms);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 7e6c3ee82f5b..a896de290307 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -23,9 +23,11 @@
 
 #include 
 #include 
+#include 
 #include "kfd_device_queue_manager.h"
 #include "kfd_priv.h"
 #include "kfd_kernel_queue.h"
+#include "am

[PATCH RFC v4 09/16] drm, cgroup: Add TTM buffer allocation stats

2019-08-29 Thread Kenny Ho
The drm resource being measured is the TTM (Translation Table Manager)
buffers.  TTM manages different types of memory that a GPU might access.
These memory types include dedicated Video RAM (VRAM) and host/system
memory accessible through IOMMU (GART/GTT).  TTM is currently used by
multiple drm drivers (amd, ast, bochs, cirrus, hisilicon, maga200,
nouveau, qxl, virtio, vmwgfx.)

drm.memory.stats
A read-only nested-keyed file which exists on all cgroups.
Each entry is keyed by the drm device's major:minor.  The
following nested keys are defined.

  == =
  system Host/system memory
  tt Host memory used by the drm device (GTT/GART)
  vram   Video RAM used by the drm device
  priv   Other drm device, vendor specific memory
  == =

Reading returns the following::

226:0 system=0 tt=0 vram=0 priv=0
226:1 system=0 tt=9035776 vram=17768448 priv=16809984
226:2 system=0 tt=9035776 vram=17768448 priv=16809984

drm.memory.evict.stats
A read-only flat-keyed file which exists on all cgroups.  Each
entry is keyed by the drm device's major:minor.

Total number of evictions.

Change-Id: Ice2c4cc845051229549bebeb6aa2d7d6153bdf6a
Signed-off-by: Kenny Ho 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |   3 +-
 drivers/gpu/drm/ttm/ttm_bo.c|  30 +++
 drivers/gpu/drm/ttm/ttm_bo_util.c   |   4 +
 include/drm/drm_cgroup.h|  19 +
 include/drm/ttm/ttm_bo_api.h|   2 +
 include/drm/ttm/ttm_bo_driver.h |   8 ++
 include/linux/cgroup_drm.h  |   6 ++
 kernel/cgroup/drm.c | 108 
 8 files changed, 179 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index cfcbbdc39656..463e015e8694 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1720,8 +1720,9 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
mutex_init(>mman.gtt_window_lock);
 
/* No others user of address space so set it to 0 */
-   r = ttm_bo_device_init(>mman.bdev,
+   r = ttm_bo_device_init_tmp(>mman.bdev,
   _bo_driver,
+  adev->ddev,
   adev->ddev->anon_inode->i_mapping,
   adev->need_dma32);
if (r) {
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 58c403eda04e..a0e9ce46baf3 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -34,6 +34,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -42,6 +43,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static void ttm_bo_global_kobj_release(struct kobject *kobj);
 
@@ -151,6 +153,10 @@ static void ttm_bo_release_list(struct kref *list_kref)
struct ttm_bo_device *bdev = bo->bdev;
size_t acc_size = bo->acc_size;
 
+   if (bo->bdev->ddev != NULL) // TODO: remove after ddev initiazlied for 
all
+   drmcg_unchg_mem(bo);
+   drmcg_put(bo->drmcg);
+
BUG_ON(kref_read(>list_kref));
BUG_ON(kref_read(>kref));
BUG_ON(atomic_read(>cpu_writers));
@@ -360,6 +366,8 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object 
*bo,
if (bo->mem.mem_type == TTM_PL_SYSTEM) {
if (bdev->driver->move_notify)
bdev->driver->move_notify(bo, evict, mem);
+   if (bo->bdev->ddev != NULL) // TODO: remove after ddev 
initiazlied for all
+   drmcg_mem_track_move(bo, evict, mem);
bo->mem = *mem;
mem->mm_node = NULL;
goto moved;
@@ -368,6 +376,8 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object 
*bo,
 
if (bdev->driver->move_notify)
bdev->driver->move_notify(bo, evict, mem);
+   if (bo->bdev->ddev != NULL) // TODO: remove after ddev initiazlied for 
all
+   drmcg_mem_track_move(bo, evict, mem);
 
if (!(old_man->flags & TTM_MEMTYPE_FLAG_FIXED) &&
!(new_man->flags & TTM_MEMTYPE_FLAG_FIXED))
@@ -381,6 +391,8 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object 
*bo,
if (bdev->driver->move_notify) {
swap(*mem, bo->mem);
bdev->driver->move_notify(bo, false, mem);
+   if (bo->bdev->ddev != NULL) // TODO: remove after ddev 
initiazlied for all
+

[PATCH RFC v4 05/16] drm, cgroup: Add peak GEM buffer allocation stats

2019-08-29 Thread Kenny Ho
drm.buffer.peak.stats
A read-only flat-keyed file which exists on all cgroups.  Each
entry is keyed by the drm device's major:minor.

Largest (high water mark) GEM buffer allocated in bytes.

Change-Id: I79e56222151a3d33a76a61ba0097fe93ebb3449f
Signed-off-by: Kenny Ho 
---
 Documentation/admin-guide/cgroup-v2.rst |  6 ++
 include/linux/cgroup_drm.h  |  3 +++
 kernel/cgroup/drm.c | 12 
 3 files changed, 21 insertions(+)

diff --git a/Documentation/admin-guide/cgroup-v2.rst 
b/Documentation/admin-guide/cgroup-v2.rst
index 0e29d136e2f9..8588a0ffc69d 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1907,6 +1907,12 @@ DRM Interface Files
 
Total GEM buffer allocation in bytes.
 
+  drm.buffer.peak.stats
+   A read-only flat-keyed file which exists on all cgroups.  Each
+   entry is keyed by the drm device's major:minor.
+
+   Largest (high water mark) GEM buffer allocated in bytes.
+
 GEM Buffer Ownership
 
 
diff --git a/include/linux/cgroup_drm.h b/include/linux/cgroup_drm.h
index 1d8a7f2cdb4e..974d390cfa4f 100644
--- a/include/linux/cgroup_drm.h
+++ b/include/linux/cgroup_drm.h
@@ -15,6 +15,7 @@
 
 enum drmcg_res_type {
DRMCG_TYPE_BO_TOTAL,
+   DRMCG_TYPE_BO_PEAK,
__DRMCG_TYPE_LAST,
 };
 
@@ -24,6 +25,8 @@ enum drmcg_res_type {
 struct drmcg_device_resource {
/* for per device stats */
s64 bo_stats_total_allocated;
+
+   s64 bo_stats_peak_allocated;
 };
 
 /**
diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
index 87ae9164d8d8..0bf5b95668c4 100644
--- a/kernel/cgroup/drm.c
+++ b/kernel/cgroup/drm.c
@@ -129,6 +129,9 @@ static void drmcg_print_stats(struct drmcg_device_resource 
*ddr,
case DRMCG_TYPE_BO_TOTAL:
seq_printf(sf, "%lld\n", ddr->bo_stats_total_allocated);
break;
+   case DRMCG_TYPE_BO_PEAK:
+   seq_printf(sf, "%lld\n", ddr->bo_stats_peak_allocated);
+   break;
default:
seq_puts(sf, "\n");
break;
@@ -177,6 +180,12 @@ struct cftype files[] = {
.private = DRMCG_CTF_PRIV(DRMCG_TYPE_BO_TOTAL,
DRMCG_FTYPE_STATS),
},
+   {
+   .name = "buffer.peak.stats",
+   .seq_show = drmcg_seq_show,
+   .private = DRMCG_CTF_PRIV(DRMCG_TYPE_BO_PEAK,
+   DRMCG_FTYPE_STATS),
+   },
{ } /* terminate */
 };
 
@@ -260,6 +269,9 @@ void drmcg_chg_bo_alloc(struct drmcg *drmcg, struct 
drm_device *dev,
ddr = drmcg->dev_resources[devIdx];
 
ddr->bo_stats_total_allocated += (s64)size;
+
+   if (ddr->bo_stats_peak_allocated < (s64)size)
+   ddr->bo_stats_peak_allocated = (s64)size;
}
mutex_unlock(>drmcg_mutex);
 }
-- 
2.22.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH RFC v4 15/16] drm, cgroup: add update trigger after limit change

2019-08-29 Thread Kenny Ho
Before this commit, drmcg limits are updated but enforcement is delayed
until the next time the driver check against the new limit.  While this
is sufficient for certain resources, a more proactive enforcement may be
needed for other resources.

Introducing an optional drmcg_limit_updated callback for the DRM
drivers.  When defined, it will be called in two scenarios:
1) When limits are updated for a particular cgroup, the callback will be
triggered for each task in the updated cgroup.
2) When a task is migrated from one cgroup to another, the callback will
be triggered for each resource type for the migrated task.

Change-Id: I68187a72818b855b5f295aefcb241cda8ab63b00
Signed-off-by: Kenny Ho 
---
 include/drm/drm_drv.h | 10 
 kernel/cgroup/drm.c   | 57 +++
 2 files changed, 67 insertions(+)

diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h
index c8a37a08d98d..7e588b874a27 100644
--- a/include/drm/drm_drv.h
+++ b/include/drm/drm_drv.h
@@ -669,6 +669,16 @@ struct drm_driver {
void (*drmcg_custom_init)(struct drm_device *dev,
struct drmcg_props *props);
 
+   /**
+* @drmcg_limit_updated
+*
+* Optional callback
+*/
+   void (*drmcg_limit_updated)(struct drm_device *dev,
+   struct task_struct *task,\
+   struct drmcg_device_resource *ddr,
+   enum drmcg_res_type res_type);
+
/**
 * @gem_vm_ops: Driver private ops for this object
 */
diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
index 18c4368e2c29..99772e5d9ccc 100644
--- a/kernel/cgroup/drm.c
+++ b/kernel/cgroup/drm.c
@@ -621,6 +621,23 @@ static void drmcg_nested_limit_parse(struct 
kernfs_open_file *of,
}
 }
 
+static void drmcg_limit_updated(struct drm_device *dev, struct drmcg *drmcg,
+   enum drmcg_res_type res_type)
+{
+   struct drmcg_device_resource *ddr =
+   drmcg->dev_resources[dev->primary->index];
+   struct css_task_iter it;
+   struct task_struct *task;
+
+   css_task_iter_start(>css.cgroup->self,
+   CSS_TASK_ITER_PROCS, );
+   while ((task = css_task_iter_next())) {
+   dev->driver->drmcg_limit_updated(dev, task,
+   ddr, res_type);
+   }
+   css_task_iter_end();
+}
+
 static ssize_t drmcg_limit_write(struct kernfs_open_file *of, char *buf,
size_t nbytes, loff_t off)
 {
@@ -726,6 +743,10 @@ static ssize_t drmcg_limit_write(struct kernfs_open_file 
*of, char *buf,
default:
break;
}
+
+   if (dm->dev->driver->drmcg_limit_updated)
+   drmcg_limit_updated(dm->dev, drmcg, type);
+
drm_dev_put(dm->dev); /* release from drm_minor_acquire */
}
 
@@ -863,9 +884,45 @@ struct cftype files[] = {
{ } /* terminate */
 };
 
+static int drmcg_attach_fn(int id, void *ptr, void *data)
+{
+   struct drm_minor *minor = ptr;
+   struct task_struct *task = data;
+   struct drm_device *dev;
+
+   if (minor->type != DRM_MINOR_PRIMARY)
+   return 0;
+
+   dev = minor->dev;
+
+   if (dev->driver->drmcg_limit_updated) {
+   struct drmcg *drmcg = drmcg_get(task);
+   struct drmcg_device_resource *ddr =
+   drmcg->dev_resources[minor->index];
+   enum drmcg_res_type type;
+
+   for (type = 0; type < __DRMCG_TYPE_LAST; type++)
+   dev->driver->drmcg_limit_updated(dev, task, ddr, type);
+
+   drmcg_put(drmcg);
+   }
+
+   return 0;
+}
+
+static void drmcg_attach(struct cgroup_taskset *tset)
+{
+   struct task_struct *task;
+   struct cgroup_subsys_state *css;
+
+   cgroup_taskset_for_each(task, css, tset)
+   drm_minor_for_each(_attach_fn, task);
+}
+
 struct cgroup_subsys drm_cgrp_subsys = {
.css_alloc  = drmcg_css_alloc,
.css_free   = drmcg_css_free,
+   .attach = drmcg_attach,
.early_init = false,
.legacy_cftypes = files,
.dfl_cftypes= files,
-- 
2.22.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH RFC v4 08/16] drm, cgroup: Add peak GEM buffer allocation limit

2019-08-29 Thread Kenny Ho
drm.buffer.peak.default
A read-only flat-keyed file which exists on the root cgroup.
Each entry is keyed by the drm device's major:minor.

Default limits on the largest GEM buffer allocation in bytes.

drm.buffer.peak.max
A read-write flat-keyed file which exists on all cgroups.  Each
entry is keyed by the drm device's major:minor.

Per device limits on the largest GEM buffer allocation in bytes.
This is a hard limit.  Attempts in allocating beyond the cgroup
limit will result in ENOMEM.  Shorthand understood by memparse
(such as k, m, g) can be used.

Set largest allocation for /dev/dri/card1 to 4MB
echo "226:1 4m" > drm.buffer.peak.max

Change-Id: I0830d56775568e1cf215b56cc892d5e7945e9f25
Signed-off-by: Kenny Ho 
---
 Documentation/admin-guide/cgroup-v2.rst | 18 ++
 include/drm/drm_cgroup.h|  1 +
 include/linux/cgroup_drm.h  |  1 +
 kernel/cgroup/drm.c | 48 +
 4 files changed, 68 insertions(+)

diff --git a/Documentation/admin-guide/cgroup-v2.rst 
b/Documentation/admin-guide/cgroup-v2.rst
index e8fac2684179..87a195133eaa 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1940,6 +1940,24 @@ DRM Interface Files
Set allocation limit for /dev/dri/card0 to 512MB
echo "226:0 512m" > drm.buffer.total.max
 
+  drm.buffer.peak.default
+   A read-only flat-keyed file which exists on the root cgroup.
+   Each entry is keyed by the drm device's major:minor.
+
+   Default limits on the largest GEM buffer allocation in bytes.
+
+  drm.buffer.peak.max
+   A read-write flat-keyed file which exists on all cgroups.  Each
+   entry is keyed by the drm device's major:minor.
+
+   Per device limits on the largest GEM buffer allocation in bytes.
+   This is a hard limit.  Attempts in allocating beyond the cgroup
+   limit will result in ENOMEM.  Shorthand understood by memparse
+   (such as k, m, g) can be used.
+
+   Set largest allocation for /dev/dri/card1 to 4MB
+   echo "226:1 4m" > drm.buffer.peak.max
+
 GEM Buffer Ownership
 
 
diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h
index 49c5d35ff6e1..d61b90beded5 100644
--- a/include/drm/drm_cgroup.h
+++ b/include/drm/drm_cgroup.h
@@ -14,6 +14,7 @@ struct drmcg_props {
boollimit_enforced;
 
s64 bo_limits_total_allocated_default;
+   s64 bo_limits_peak_allocated_default;
 };
 
 #ifdef CONFIG_CGROUP_DRM
diff --git a/include/linux/cgroup_drm.h b/include/linux/cgroup_drm.h
index eb54e56f20ae..87a2566c9fdd 100644
--- a/include/linux/cgroup_drm.h
+++ b/include/linux/cgroup_drm.h
@@ -29,6 +29,7 @@ struct drmcg_device_resource {
s64 bo_limits_total_allocated;
 
s64 bo_stats_peak_allocated;
+   s64 bo_limits_peak_allocated;
 
s64 bo_stats_count_allocated;
 };
diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
index 7161fa40e156..2f54bff291e5 100644
--- a/kernel/cgroup/drm.c
+++ b/kernel/cgroup/drm.c
@@ -75,6 +75,9 @@ static inline int init_drmcg_single(struct drmcg *drmcg, 
struct drm_device *dev)
ddr->bo_limits_total_allocated =
dev->drmcg_props.bo_limits_total_allocated_default;
 
+   ddr->bo_limits_peak_allocated =
+   dev->drmcg_props.bo_limits_peak_allocated_default;
+
mutex_unlock(>drmcg_mutex);
return 0;
 }
@@ -157,6 +160,9 @@ static void drmcg_print_limits(struct drmcg_device_resource 
*ddr,
case DRMCG_TYPE_BO_TOTAL:
seq_printf(sf, "%lld\n", ddr->bo_limits_total_allocated);
break;
+   case DRMCG_TYPE_BO_PEAK:
+   seq_printf(sf, "%lld\n", ddr->bo_limits_peak_allocated);
+   break;
default:
seq_puts(sf, "\n");
break;
@@ -171,6 +177,10 @@ static void drmcg_print_default(struct drmcg_props *props,
seq_printf(sf, "%lld\n",
props->bo_limits_total_allocated_default);
break;
+   case DRMCG_TYPE_BO_PEAK:
+   seq_printf(sf, "%lld\n",
+   props->bo_limits_peak_allocated_default);
+   break;
default:
seq_puts(sf, "\n");
break;
@@ -327,6 +337,24 @@ static ssize_t drmcg_limit_write(struct kernfs_open_file 
*of, char *buf,
drmcg_value_apply(dm->dev,
>bo_limits_total_allocated, val);
break;
+   case DRMCG_

[PATCH RFC v4 04/16] drm, cgroup: Add total GEM buffer allocation stats

2019-08-29 Thread Kenny Ho
The drm resource being measured here is the GEM buffer objects.  User
applications allocate and free these buffers.  In addition, a process
can allocate a buffer and share it with another process.  The consumer
of a shared buffer can also outlive the allocator of the buffer.

For the purpose of cgroup accounting and limiting, ownership of the
buffer is deemed to be the cgroup for which the allocating process
belongs to.  There is one cgroup stats per drm device.  Each allocation
is charged to the owning cgroup as well as all its ancestors.

Similar to the memory cgroup, migrating a process to a different cgroup
does not move the GEM buffer usages that the process started while in
previous cgroup, to the new cgroup.

The following is an example to illustrate some of the operations.  Given
the following cgroup hierarchy (The letters are cgroup names with R
being the root cgroup.  The numbers in brackets are processes.  The
processes are placed with cgroup's 'No Internal Process Constraint' in
mind, so no process is placed in cgroup B.)

R (4, 5) -- A (6)
 \
  B  C (7,8)
   \
D (9)

Here is a list of operation and the associated effect on the size
track by the cgroups (for simplicity, each buffer is 1 unit in size.)

==  ==  ==  ==  ==  ===
R   A   B   C   D   Ops
==  ==  ==  ==  ==  ===
1   0   0   0   0   4 allocated a buffer
1   0   0   0   0   4 shared a buffer with 5
1   0   0   0   0   4 shared a buffer with 9
2   0   1   0   1   9 allocated a buffer
3   0   2   1   1   7 allocated a buffer
3   0   2   1   1   7 shared a buffer with 8
3   0   2   1   1   7 sharing with 9
3   0   2   1   1   7 release a buffer
3   0   2   1   1   7 migrate to cgroup D
3   0   2   1   1   9 release a buffer from 7
2   0   1   0   1   8 release a buffer from 7 (last ref to shared buf)
==  ==  ==  ==  ==  ===

drm.buffer.stats
A read-only flat-keyed file which exists on all cgroups.  Each
entry is keyed by the drm device's major:minor.

Total GEM buffer allocation in bytes.

Change-Id: I9d662ec50d64bb40a37dbf47f018b2f3a1c033ad
Signed-off-by: Kenny Ho 
---
 Documentation/admin-guide/cgroup-v2.rst |  50 +-
 drivers/gpu/drm/drm_gem.c   |   9 ++
 include/drm/drm_cgroup.h|  16 +++
 include/drm/drm_gem.h   |  11 +++
 include/linux/cgroup_drm.h  |   6 ++
 kernel/cgroup/drm.c | 126 
 6 files changed, 217 insertions(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/cgroup-v2.rst 
b/Documentation/admin-guide/cgroup-v2.rst
index 2936423a3fd5..0e29d136e2f9 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -63,6 +63,7 @@ v1 is available under Documentation/cgroup-v1/.
5-7-1. RDMA Interface Files
  5-8. DRM
5-8-1. DRM Interface Files
+   5-8-2. GEM Buffer Ownership
  5-9. Misc
5-9-1. perf_event
  5-N. Non-normative information
@@ -1900,7 +1901,54 @@ of DRM (Direct Rendering Manager) and GPU-related 
resources.
 DRM Interface Files
 
 
-TODO
+  drm.buffer.stats
+   A read-only flat-keyed file which exists on all cgroups.  Each
+   entry is keyed by the drm device's major:minor.
+
+   Total GEM buffer allocation in bytes.
+
+GEM Buffer Ownership
+
+
+For the purpose of cgroup accounting and limiting, ownership of the
+buffer is deemed to be the cgroup for which the allocating process
+belongs to.  There is one cgroup stats per drm device.  Each allocation
+is charged to the owning cgroup as well as all its ancestors.
+
+Similar to the memory cgroup, migrating a process to a different cgroup
+does not move the GEM buffer usages that the process started while in
+previous cgroup, to the new cgroup.
+
+The following is an example to illustrate some of the operations.  Given
+the following cgroup hierarchy (The letters are cgroup names with R
+being the root cgroup.  The numbers in brackets are processes.  The
+processes are placed with cgroup's 'No Internal Process Constraint' in
+mind, so no process is placed in cgroup B.)
+
+R (4, 5) -- A (6)
+ \
+  B  C (7,8)
+   \
+D (9)
+
+Here is a list of operation and the associated effect on the size
+track by the cgroups (for simplicity, each buffer is 1 unit in size.)
+
+==  ==  ==  ==  ==  ===
+R   A   B   C   D   Ops
+==  ==  ==  ==  ==  ===
+1   0   0   0   0   4 allocated a buffer
+1   0   0   0   0   4 shared a buffer with 5
+1   0   0   0   0   4 shared a buffer with 9
+2   0   1   0   1   9 allocated a buffer
+3   0   2   1   1   7 allocated a buffer
+3   0   2   1   1   7 shared a buffer with 8
+3   0   2   1   1   7 sharing with 9
+3   0   2   1   1

[PATCH RFC v4 14/16] drm, cgroup: Introduce lgpu as DRM cgroup resource

2019-08-29 Thread Kenny Ho
drm.lgpu
A read-write nested-keyed file which exists on all cgroups.
Each entry is keyed by the DRM device's major:minor.

lgpu stands for logical GPU, it is an abstraction used to
subdivide a physical DRM device for the purpose of resource
management.

The lgpu is a discrete quantity that is device specific (i.e.
some DRM devices may have 64 lgpus while others may have 100
lgpus.)  The lgpu is a single quantity with two representations
denoted by the following nested keys.

  = 
  count Representing lgpu as anonymous resource
  list  Representing lgpu as named resource
  = 

For example:
226:0 count=256 list=0-255
226:1 count=4 list=0,2,4,6
226:2 count=32 list=32-63

lgpu is represented by a bitmap and uses the bitmap_parselist
kernel function so the list key input format is a
comma-separated list of decimal numbers and ranges.

Consecutively set bits are shown as two hyphen-separated decimal
numbers, the smallest and largest bit numbers set in the range.
Optionally each range can be postfixed to denote that only parts
of it should be set.  The range will divided to groups of
specific size.
Syntax: range:used_size/group_size
Example: 0-1023:2/256 ==> 0,1,256,257,512,513,768,769

The count key is the hamming weight / hweight of the bitmap.

Both count and list accept the max and default keywords.

Some DRM devices may only support lgpu as anonymous resources.
In such case, the significance of the position of the set bits
in list will be ignored.

This lgpu resource supports the 'allocation' resource
distribution model.

Change-Id: I1afcacf356770930c7f925df043e51ad06ceb98e
Signed-off-by: Kenny Ho 
---
 Documentation/admin-guide/cgroup-v2.rst |  46 
 include/drm/drm_cgroup.h|   4 +
 include/linux/cgroup_drm.h  |   6 ++
 kernel/cgroup/drm.c | 135 
 4 files changed, 191 insertions(+)

diff --git a/Documentation/admin-guide/cgroup-v2.rst 
b/Documentation/admin-guide/cgroup-v2.rst
index 87a195133eaa..57f18469bd76 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1958,6 +1958,52 @@ DRM Interface Files
Set largest allocation for /dev/dri/card1 to 4MB
echo "226:1 4m" > drm.buffer.peak.max
 
+  drm.lgpu
+   A read-write nested-keyed file which exists on all cgroups.
+   Each entry is keyed by the DRM device's major:minor.
+
+   lgpu stands for logical GPU, it is an abstraction used to
+   subdivide a physical DRM device for the purpose of resource
+   management.
+
+   The lgpu is a discrete quantity that is device specific (i.e.
+   some DRM devices may have 64 lgpus while others may have 100
+   lgpus.)  The lgpu is a single quantity with two representations
+   denoted by the following nested keys.
+
+ = 
+ count Representing lgpu as anonymous resource
+ list  Representing lgpu as named resource
+ = 
+
+   For example:
+   226:0 count=256 list=0-255
+   226:1 count=4 list=0,2,4,6
+   226:2 count=32 list=32-63
+
+   lgpu is represented by a bitmap and uses the bitmap_parselist
+   kernel function so the list key input format is a
+   comma-separated list of decimal numbers and ranges.
+
+   Consecutively set bits are shown as two hyphen-separated decimal
+   numbers, the smallest and largest bit numbers set in the range.
+   Optionally each range can be postfixed to denote that only parts
+   of it should be set.  The range will divided to groups of
+   specific size.
+   Syntax: range:used_size/group_size
+   Example: 0-1023:2/256 ==> 0,1,256,257,512,513,768,769
+
+   The count key is the hamming weight / hweight of the bitmap.
+
+   Both count and list accept the max and default keywords.
+
+   Some DRM devices may only support lgpu as anonymous resources.
+   In such case, the significance of the position of the set bits
+   in list will be ignored.
+
+   This lgpu resource supports the 'allocation' resource
+   distribution model.
+
 GEM Buffer Ownership
 
 
diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h
index 6d9707e1eb72..a8d6be0b075b 100644
--- a/include/drm/drm_cgroup.h
+++ b/include/drm/drm_cgroup.h
@@ -6,6 +6,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -28,6 +29,9 @@ struct drmcg_props {
s64 mem_highs_default[TTM_PL_PRIV+1

[PATCH RFC v4 11/16] drm, cgroup: Add per cgroup bw measure and control

2019-08-29 Thread Kenny Ho
The bandwidth is measured by keeping track of the amount of bytes moved
by ttm within a time period.  We defined two type of bandwidth: burst
and average.  Average bandwidth is calculated by dividing the total
amount of bytes moved within a cgroup by the lifetime of the cgroup.
Burst bandwidth is similar except that the byte and time measurement is
reset after a user configurable period.

The bandwidth control is best effort since it is done on a per move
basis instead of per byte.  The bandwidth is limited by delaying the
move of a buffer.  The bandwidth limit can be exceeded when the next
move is larger than the remaining allowance.

drm.burst_bw_period_in_us
A read-write flat-keyed file which exists on the root cgroup.
Each entry is keyed by the drm device's major:minor.

Length of a period use to measure burst bandwidth in us.
One period per device.

drm.burst_bw_period_in_us.default
A read-only flat-keyed file which exists on the root cgroup.
Each entry is keyed by the drm device's major:minor.

Default length of a period in us (one per device.)

drm.bandwidth.stats
A read-only nested-keyed file which exists on all cgroups.
Each entry is keyed by the drm device's major:minor.  The
following nested keys are defined.

  = ==
  burst_byte_per_us Burst bandwidth
  avg_bytes_per_us  Average bandwidth
  moved_byteAmount of byte moved within a period
  accum_us  Amount of time accumulated in a period
  total_moved_byte  Byte moved within the cgroup lifetime
  total_accum_usCgroup lifetime in us
  byte_credit   Available byte credit to limit avg bw
  = ==

Reading returns the following::
226:1 burst_byte_per_us=23 avg_bytes_per_us=0 moved_byte=2244608
accum_us=95575 total_moved_byte=45899776 total_accum_us=201634590
byte_credit=13214278590464
226:2 burst_byte_per_us=10 avg_bytes_per_us=219 moved_byte=430080
accum_us=39350 total_moved_byte=65518026752 total_accum_us=298337721
byte_credit=9223372036854644735

drm.bandwidth.high
A read-write nested-keyed file which exists on all cgroups.
Each entry is keyed by the drm device's major:minor.  The
following nested keys are defined.

    ===
  bytes_in_period   Burst limit per period in byte
  avg_bytes_per_us  Average bandwidth limit in bytes per us
    ===

Reading returns the following::

226:1 bytes_in_period=9223372036854775807 avg_bytes_per_us=65536
226:2 bytes_in_period=9223372036854775807 avg_bytes_per_us=65536

drm.bandwidth.default
A read-only nested-keyed file which exists on the root cgroup.
Each entry is keyed by the drm device's major:minor.  The
following nested keys are defined.

    
  bytes_in_period   Default burst limit per period in byte
  avg_bytes_per_us  Default average bw limit in bytes per us
    

Reading returns the following::

226:1 bytes_in_period=9223372036854775807 avg_bytes_per_us=65536
226:2 bytes_in_period=9223372036854775807 avg_bytes_per_us=65536

Change-Id: Ie573491325ccc16535bb943e7857f43bd0962add
Signed-off-by: Kenny Ho 
---
 drivers/gpu/drm/ttm/ttm_bo.c |   7 +
 include/drm/drm_cgroup.h |  19 +++
 include/linux/cgroup_drm.h   |  16 ++
 kernel/cgroup/drm.c  | 319 ++-
 4 files changed, 359 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index a0e9ce46baf3..32eee85f3641 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -36,6 +36,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -1256,6 +1257,12 @@ int ttm_bo_validate(struct ttm_buffer_object *bo,
 * Check whether we need to move buffer.
 */
if (!ttm_bo_mem_compat(placement, >mem, _flags)) {
+   unsigned int move_delay = drmcg_get_mem_bw_period_in_us(bo);
+
+   move_delay /= 2000; /* check every half period in ms*/
+   while (bo->bdev->ddev != NULL && !drmcg_mem_can_move(bo))
+   msleep(move_delay);
+
ret = ttm_bo_move_buffer(bo, placement, ctx);
if (ret)
return ret;
diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h
index 7d63f73a5375..9ce0d54e6bd8 100644
--- a/include/drm/drm_cgroup.h
+++ b/include/drm/drm_cgroup.

Re: [PATCH RFC v4 01/16] drm: Add drm_minor_for_each

2019-09-05 Thread Kenny Ho
On Thu, Sep 5, 2019 at 4:06 PM Daniel Vetter  wrote:
>
> On Thu, Sep 5, 2019 at 8:28 PM Kenny Ho  wrote:
> >
> > (resent in plain text mode)
> >
> > Hi Daniel,
> >
> > This is the previous patch relevant to this discussion:
> > https://patchwork.freedesktop.org/patch/314343/
>
> Ah yes, thanks for finding that.
>
> > So before I refactored the code to leverage drm_minor, I kept my own
> > list of "known" drm_device inside the controller and have explicit
> > register and unregister function to init per device cgroup defaults.
> > For v4, I refactored the per device cgroup properties and embedded
> > them into the drm_device and continue to only use the primary minor as
> > a way to index the device as v3.
>
> I didn't really like the explicit registration step, at least for the
> basic cgroup controls (like gem buffer limits), and suggested that
> should happen automatically at drm_dev_register/unregister time. I
> also talked about picking a consistent minor (if we have to use
> minors, still would like Tejun to confirm what we should do here), but
> that was an unrelated comment. So doing auto-registration on drm_minor
> was one step too far.

How about your comments on embedding properties into drm_device?  I am
actually still not clear on the downside of using drm_minor this way.
With this implementation in v4, there isn't additional state that can
go out of sync with the ground truth of drm_device from the
perspective of drm_minor.  Wouldn't the issue with hotplugging drm
device you described earlier get worsen if the cgroup controller keep
its own list?

> Just doing a drm_cg_register/unregister pair that's called from
> drm_dev_register/unregister, and then if you want, looking up the
> right minor (I think always picking the render node makes sense for
> this, and skipping if there's no render node) would make most sense.
> At least for the basic cgroup controllers which are generic across
> drivers.

Why do we want to skip drm devices that does not have a render node
and not just use the primary instead?

Kenny



> -Daniel
>
>
>
> >
> > Regards,
> > Kenny
> >
> >
> > On Wed, Sep 4, 2019 at 4:54 AM Daniel Vetter  wrote:
> > >
> > > On Tue, Sep 03, 2019 at 04:43:45PM -0400, Kenny Ho wrote:
> > > > On Tue, Sep 3, 2019 at 4:12 PM Daniel Vetter  wrote:
> > > > > On Tue, Sep 3, 2019 at 9:45 PM Kenny Ho  wrote:
> > > > > > On Tue, Sep 3, 2019 at 3:57 AM Daniel Vetter  
> > > > > > wrote:
> > > > > > > Iterating over minors for cgroups sounds very, very wrong. Why do 
> > > > > > > we care
> > > > > > > whether a buffer was allocated through kms dumb vs render nodes?
> > > > > > >
> > > > > > > I'd expect all the cgroup stuff to only work on drm_device, if it 
> > > > > > > does
> > > > > > > care about devices.
> > > > > > >
> > > > > > > (I didn't look through the patch series to find out where exactly 
> > > > > > > you're
> > > > > > > using this, so maybe I'm off the rails here).
> > > > > >
> > > > > > I am exposing this to remove the need to keep track of a separate 
> > > > > > list
> > > > > > of available drm_device in the system (to remove the registering and
> > > > > > unregistering of drm_device to the cgroup subsystem and just use
> > > > > > drm_minor as the single source of truth.)  I am only filtering out 
> > > > > > the
> > > > > > render nodes minor because they point to the same drm_device and is
> > > > > > confusing.
> > > > > >
> > > > > > Perhaps I missed an obvious way to list the drm devices without
> > > > > > iterating through the drm_minors?  (I probably jumped to the minors
> > > > > > because $major:$minor is the convention to address devices in 
> > > > > > cgroup.)
> > > > >
> > > > > Create your own if there's nothing, because you need to anyway:
> > > > > - You need special locking anyway, we can't just block on the idr lock
> > > > > for everything.
> > > > > - This needs to refcount drm_device, no the minors.
> > > > >
> > > > > Iterating over stuff still feels kinda wrong still, because normally
> > > > > the way we register/unregister userspace api (and cgroups isn't
> > > > >

Re: [PATCH RFC v4 01/16] drm: Add drm_minor_for_each

2019-09-05 Thread Kenny Ho
Hi Daniel,

This is the previous patch relevant to this discussion:
https://patchwork.freedesktop.org/patch/314343/

So before I refactored the code to leverage drm_minor, I kept my own list
of "known" drm_device inside the controller and have explicit register and
unregister function to init per device cgroup defaults.  For v4, I
refactored the per device cgroup properties and embedded them into the
drm_device and continue to only use the primary minor as a way to index the
device as v3.

Regards,
Kenny

On Wed, Sep 4, 2019 at 4:54 AM Daniel Vetter  wrote:

> On Tue, Sep 03, 2019 at 04:43:45PM -0400, Kenny Ho wrote:
> > On Tue, Sep 3, 2019 at 4:12 PM Daniel Vetter  wrote:
> > > On Tue, Sep 3, 2019 at 9:45 PM Kenny Ho  wrote:
> > > > On Tue, Sep 3, 2019 at 3:57 AM Daniel Vetter 
> wrote:
> > > > > Iterating over minors for cgroups sounds very, very wrong. Why do
> we care
> > > > > whether a buffer was allocated through kms dumb vs render nodes?
> > > > >
> > > > > I'd expect all the cgroup stuff to only work on drm_device, if it
> does
> > > > > care about devices.
> > > > >
> > > > > (I didn't look through the patch series to find out where exactly
> you're
> > > > > using this, so maybe I'm off the rails here).
> > > >
> > > > I am exposing this to remove the need to keep track of a separate
> list
> > > > of available drm_device in the system (to remove the registering and
> > > > unregistering of drm_device to the cgroup subsystem and just use
> > > > drm_minor as the single source of truth.)  I am only filtering out
> the
> > > > render nodes minor because they point to the same drm_device and is
> > > > confusing.
> > > >
> > > > Perhaps I missed an obvious way to list the drm devices without
> > > > iterating through the drm_minors?  (I probably jumped to the minors
> > > > because $major:$minor is the convention to address devices in
> cgroup.)
> > >
> > > Create your own if there's nothing, because you need to anyway:
> > > - You need special locking anyway, we can't just block on the idr lock
> > > for everything.
> > > - This needs to refcount drm_device, no the minors.
> > >
> > > Iterating over stuff still feels kinda wrong still, because normally
> > > the way we register/unregister userspace api (and cgroups isn't
> > > anything else from a drm driver pov) is by adding more calls to
> > > drm_dev_register/unregister. If you put a drm_cg_register/unregister
> > > call in there we have a clean separation, and you can track all the
> > > currently active devices however you want. Iterating over objects that
> > > can be hotunplugged any time tends to get really complicated really
> > > quickly.
> >
> > Um... I thought this is what I had previously.  Did I misunderstood
> > your feedback from v3?  Doesn't drm_minor already include all these
> > facilities so isn't creating my own kind of reinventing the wheel?
> > (as I did previously?)  drm_minor_register is called inside
> > drm_dev_register so isn't leveraging existing drm_minor facilities
> > much better solution?
>
> Hm the previous version already dropped out of my inbox, so hard to find
> it again. And I couldn't find this in archieves. Do you have pointers?
>
> I thought the previous version did cgroup init separately from drm_device
> setup, and I guess I suggested that it should be moved int
> drm_dev_register/unregister?
>
> Anyway, I don't think reusing the drm_minor registration makes sense,
> since we want to be on the drm_device, not on the minor. Which is a bit
> awkward for cgroups, which wants to identify devices using major.minor
> pairs. But I guess drm is the first subsystem where 1 device can be
> exposed through multiple minors ...
>
> Tejun, any suggestions on this?
>
> Anyway, I think just leveraging existing code because it can be abused to
> make it fit for us doesn't make sense. E.g. for the kms side we also don't
> piggy-back on top of drm_minor_register (it would be technically
> possible), but instead we have drm_modeset_register_all().
> -Daniel
>
> >
> > Kenny
> >
> > >
> > >
> > > >
> > > > Kenny
> > > >
> > > > > -Daniel
> > > > >
> > > > > > ---
> > > > > >  drivers/gpu/drm/drm_drv.c  | 19 +++
> > > > > >  drivers/gpu/drm/drm_internal.h |  4 
> > > > > >  include

Re: [PATCH RFC v4 01/16] drm: Add drm_minor_for_each

2019-09-05 Thread Kenny Ho
(resent in plain text mode)

Hi Daniel,

This is the previous patch relevant to this discussion:
https://patchwork.freedesktop.org/patch/314343/

So before I refactored the code to leverage drm_minor, I kept my own
list of "known" drm_device inside the controller and have explicit
register and unregister function to init per device cgroup defaults.
For v4, I refactored the per device cgroup properties and embedded
them into the drm_device and continue to only use the primary minor as
a way to index the device as v3.

Regards,
Kenny


On Wed, Sep 4, 2019 at 4:54 AM Daniel Vetter  wrote:
>
> On Tue, Sep 03, 2019 at 04:43:45PM -0400, Kenny Ho wrote:
> > On Tue, Sep 3, 2019 at 4:12 PM Daniel Vetter  wrote:
> > > On Tue, Sep 3, 2019 at 9:45 PM Kenny Ho  wrote:
> > > > On Tue, Sep 3, 2019 at 3:57 AM Daniel Vetter  wrote:
> > > > > Iterating over minors for cgroups sounds very, very wrong. Why do we 
> > > > > care
> > > > > whether a buffer was allocated through kms dumb vs render nodes?
> > > > >
> > > > > I'd expect all the cgroup stuff to only work on drm_device, if it does
> > > > > care about devices.
> > > > >
> > > > > (I didn't look through the patch series to find out where exactly 
> > > > > you're
> > > > > using this, so maybe I'm off the rails here).
> > > >
> > > > I am exposing this to remove the need to keep track of a separate list
> > > > of available drm_device in the system (to remove the registering and
> > > > unregistering of drm_device to the cgroup subsystem and just use
> > > > drm_minor as the single source of truth.)  I am only filtering out the
> > > > render nodes minor because they point to the same drm_device and is
> > > > confusing.
> > > >
> > > > Perhaps I missed an obvious way to list the drm devices without
> > > > iterating through the drm_minors?  (I probably jumped to the minors
> > > > because $major:$minor is the convention to address devices in cgroup.)
> > >
> > > Create your own if there's nothing, because you need to anyway:
> > > - You need special locking anyway, we can't just block on the idr lock
> > > for everything.
> > > - This needs to refcount drm_device, no the minors.
> > >
> > > Iterating over stuff still feels kinda wrong still, because normally
> > > the way we register/unregister userspace api (and cgroups isn't
> > > anything else from a drm driver pov) is by adding more calls to
> > > drm_dev_register/unregister. If you put a drm_cg_register/unregister
> > > call in there we have a clean separation, and you can track all the
> > > currently active devices however you want. Iterating over objects that
> > > can be hotunplugged any time tends to get really complicated really
> > > quickly.
> >
> > Um... I thought this is what I had previously.  Did I misunderstood
> > your feedback from v3?  Doesn't drm_minor already include all these
> > facilities so isn't creating my own kind of reinventing the wheel?
> > (as I did previously?)  drm_minor_register is called inside
> > drm_dev_register so isn't leveraging existing drm_minor facilities
> > much better solution?
>
> Hm the previous version already dropped out of my inbox, so hard to find
> it again. And I couldn't find this in archieves. Do you have pointers?
>
> I thought the previous version did cgroup init separately from drm_device
> setup, and I guess I suggested that it should be moved int
> drm_dev_register/unregister?
>
> Anyway, I don't think reusing the drm_minor registration makes sense,
> since we want to be on the drm_device, not on the minor. Which is a bit
> awkward for cgroups, which wants to identify devices using major.minor
> pairs. But I guess drm is the first subsystem where 1 device can be
> exposed through multiple minors ...
>
> Tejun, any suggestions on this?
>
> Anyway, I think just leveraging existing code because it can be abused to
> make it fit for us doesn't make sense. E.g. for the kms side we also don't
> piggy-back on top of drm_minor_register (it would be technically
> possible), but instead we have drm_modeset_register_all().
> -Daniel
>
> >
> > Kenny
> >
> > >
> > >
> > > >
> > > > Kenny
> > > >
> > > > > -Daniel
> > > > >
> > > > > > ---
> > > > > >  drivers/gpu/drm/drm_drv.c  | 19 +++
> > > > > >  drivers/gpu/dr

Re: [PATCH RFC v4 01/16] drm: Add drm_minor_for_each

2019-09-05 Thread Kenny Ho
On Thu, Sep 5, 2019 at 4:32 PM Daniel Vetter  wrote:
>
*snip*
> drm_dev_unregister gets called on hotunplug, so your cgroup-internal
> tracking won't get out of sync any more than the drm_minor list gets
> out of sync with drm_devices. The trouble with drm_minor is just that
> cgroup doesn't track allocations on drm_minor (that's just the uapi
> flavour), but on the underlying drm_device. So really doesn't make
> much sense to attach cgroup tracking to the drm_minor.

Um... I think I get what you are saying, but isn't this a matter of
the cgroup controller doing a drm_dev_get when using the drm_minor?
Or that won't work because it's possible to have a valid drm_minor but
invalid drm_device in it? I understand it's an extra level of
indirection but since the convention for addressing device in cgroup
is using $major:$minor I don't see a way to escape this.  (Tejun
actually already made a comment on my earlier RFC where I didn't
follow the major:minor convention strictly.)

Kenny

> > > Just doing a drm_cg_register/unregister pair that's called from
> > > drm_dev_register/unregister, and then if you want, looking up the
> > > right minor (I think always picking the render node makes sense for
> > > this, and skipping if there's no render node) would make most sense.
> > > At least for the basic cgroup controllers which are generic across
> > > drivers.
> >
> > Why do we want to skip drm devices that does not have a render node
> > and not just use the primary instead?
>
> I guess we could also take the primary node, but drivers with only
> primary node are generaly display-only drm drivers. Not sure we want
> cgroups on those (but I guess can't hurt, and more consistent). But
> then we'd always need to pick the primary node for cgroup
> identification purposes.
> -Daniel
>
> >
> > Kenny
> >
> >
> >
> > > -Daniel
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH RFC v4 16/16] drm/amdgpu: Integrate with DRM cgroup

2019-12-03 Thread Kenny Ho
Hey Joe,

I don't have all the answers right now but one thing I want to mention
is that, with cgroup, there's always a possibility for a user
configuration that lead to under resource for the application.  Your
comments certainly highlight the needs to make under-resource
situation obvious to debug.  (I want to write this down so I don't
forget also... :) I should probably have some dmesg for situation like
this.)  Thanks!

Regards,
Kenny

On Mon, Dec 2, 2019 at 5:05 PM Greathouse, Joseph
 wrote:
>
> > -Original Message-
> > From: Kenny Ho 
> > Sent: Friday, November 29, 2019 12:00 AM
> >
> > Reducing audience since this is AMD specific.
> >
> > On Tue, Oct 8, 2019 at 3:11 PM Kuehling, Felix  
> > wrote:
> > >
> > > On 2019-08-29 2:05 a.m., Kenny Ho wrote:
> > > > The number of logical gpu (lgpu) is defined to be the number of
> > > > compute unit (CU) for a device.  The lgpu allocation limit only
> > > > applies to compute workload for the moment (enforced via kfd queue
> > > > creation.)  Any cu_mask update is validated against the availability
> > > > of the compute unit as defined by the drmcg the kfd process belongs to.
> > >
> > > There is something missing here. There is an API for the application
> > > to specify a CU mask. Right now it looks like the
> > > application-specified and CGroup-specified CU masks would clobber each
> > > other. Instead the two should be merged.
> > >
> > > The CGroup-specified mask should specify a subset of CUs available for
> > > application-specified CU masks. When the cgroup CU mask changes, you'd
> > > need to take any application-specified CU masks into account before
> > > updating the hardware.
> > The idea behind the current implementation is to give sysadmin priority 
> > over user application (as that is the definition of control
> > group.)  Mask specified by applicatoin/user is validated by 
> > pqm_drmcg_lgpu_validate and rejected with EACCES if they are not
> > compatible.  The alternative is to ignore the difference and have the 
> > kernel guess/redistribute the assignment but I am not sure if this
> > is a good approach since there is not enough information to allow the 
> > kernel to guess the user's intention correctly consistently.  (This
> > is base on multiple conversations with you and Joe that, led me to believe, 
> > there are situation where spreading CU assignment across
> > multiple SE is a good thing but not always.)
> >
> > If the cgroup-specified mask is changed after the application has set the 
> > mask, the intersection of the two masks will be set instead.  It
> > is possible to have no intersection and in this case no CU is made 
> > available to the application (just like the possibility for memcgroup to
> > starve the amount of memory needed by an application.)
>
> I don't disagree with forcing a user to work within an lgpu's allocation. But 
> there's two minor problems here:
>
> 1) we will need a way for the process to query what the lgpu's bitmap looks 
> like. You and Felix are somewhat discussing this below, but I don't think the 
> KFD's "number of CUs" topology information is sufficient. I can know I have 
> 32 CUs, but I don't know which 32 bits in the bitmask are turned on. But your 
> code in pqm_drmcg_lgpu_validate() requires a subset when setting  CU mask on 
> an lgpu. A user needs to know what bits are on in the LGPU for this to work.
> 2) Even if we have a query API, do we have an easy way to prevent a data 
> race? Do we care? For instance, if I query the existing lgpu bitmap, then try 
> to set a CU mask on a subset of that, it's possible that the lgpu will change 
> between the query and set. That would make the setting fail, maybe that's 
> good enough (you can just try in a loop until it succeeds?)
>
> Do empty CU masks actually work? This seems like something we would want to 
> avoid. This could happen not infrequently if someone does something like:
> * lgpu with half the CUs enabled
> * User sets a mask to use half of those CUs
> * lgpu is changed to enable the other half of the CUS --> now the user's mask 
> is fully destroyed and everything dies. :\
>
> > > The KFD topology APIs report the number of available CUs to the
> > > application. CGroups would change that number at runtime and
> > > applications would not expect that. I think the best way to deal with
> > > that would be to have multiple bits in the application-specified CU
> > > mask map to the same CU. How to do that in a fair way is not obvious.
> > > I guess a more coarse-grain div

Re: [PATCH RFC v4 02/16] cgroup: Introduce cgroup for drm subsystem

2019-11-28 Thread Kenny Ho
On Tue, Oct 1, 2019 at 10:31 AM Michal Koutný  wrote:
> On Thu, Aug 29, 2019 at 02:05:19AM -0400, Kenny Ho  wrote:
> > +struct cgroup_subsys drm_cgrp_subsys = {
> > + .css_alloc  = drmcg_css_alloc,
> > + .css_free   = drmcg_css_free,
> > + .early_init = false,
> > + .legacy_cftypes = files,
> Do you really want to expose the DRM controller on v1 hierarchies (where
> threads of one process can be in different cgroups, or children cgroups
> compete with their parents)?

(Sorry for the delay, I have been distracted by something else.)
Yes, I am hoping to make the functionality as widely available as
possible since the ecosystem is still transitioning to v2.  Do you see
inherent problem with this approach?

Regards,
Kenny


>
> > + .dfl_cftypes= files,
> > +};
>
> Just asking,
> Michal
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH RFC v4 16/16] drm/amdgpu: Integrate with DRM cgroup

2019-11-28 Thread Kenny Ho
Reducing audience since this is AMD specific.

On Tue, Oct 8, 2019 at 3:11 PM Kuehling, Felix  wrote:
>
> On 2019-08-29 2:05 a.m., Kenny Ho wrote:
> > The number of logical gpu (lgpu) is defined to be the number of compute
> > unit (CU) for a device.  The lgpu allocation limit only applies to
> > compute workload for the moment (enforced via kfd queue creation.)  Any
> > cu_mask update is validated against the availability of the compute unit
> > as defined by the drmcg the kfd process belongs to.
>
> There is something missing here. There is an API for the application to
> specify a CU mask. Right now it looks like the application-specified and
> CGroup-specified CU masks would clobber each other. Instead the two
> should be merged.
>
> The CGroup-specified mask should specify a subset of CUs available for
> application-specified CU masks. When the cgroup CU mask changes, you'd
> need to take any application-specified CU masks into account before
> updating the hardware.
The idea behind the current implementation is to give sysadmin
priority over user application (as that is the definition of control
group.)  Mask specified by applicatoin/user is validated by
pqm_drmcg_lgpu_validate and rejected with EACCES if they are not
compatible.  The alternative is to ignore the difference and have the
kernel guess/redistribute the assignment but I am not sure if this is
a good approach since there is not enough information to allow the
kernel to guess the user's intention correctly consistently.  (This is
base on multiple conversations with you and Joe that, led me to
believe, there are situation where spreading CU assignment across
multiple SE is a good thing but not always.)

If the cgroup-specified mask is changed after the application has set
the mask, the intersection of the two masks will be set instead.  It
is possible to have no intersection and in this case no CU is made
available to the application (just like the possibility for memcgroup
to starve the amount of memory needed by an application.)

> The KFD topology APIs report the number of available CUs to the
> application. CGroups would change that number at runtime and
> applications would not expect that. I think the best way to deal with
> that would be to have multiple bits in the application-specified CU mask
> map to the same CU. How to do that in a fair way is not obvious. I guess
> a more coarse-grain division of the GPU into LGPUs would make this
> somewhat easier.
Another possibility is to add namespace to the topology sysfs such
that the correct number of CUs changes accordingly.  Although that
wouldn't give the user the available mask that is made available by
this implementation via the cgroup sysfs.  Another possibility is to
modify the thunk similar to what was done for device cgroup (device
re-mapping.)

> How is this problem handled for CPU cores and the interaction with CPU
> pthread_setaffinity_np?
Per the documentation of pthread_setaffinity_np, "If the call is
successful, and the thread is not currently running on one of the CPUs
in cpuset, then it is migrated to one of those CPUs."
http://man7.org/linux/man-pages/man3/pthread_setaffinity_np.3.html

Regards,
Kenny



> Regards,
>Felix
>
>
> >
> > Change-Id: I69a57452c549173a1cd623c30dc57195b3b6563e
> > Signed-off-by: Kenny Ho 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|   4 +
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |  21 +++
> >   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  |   6 +
> >   drivers/gpu/drm/amd/amdkfd/kfd_priv.h |   3 +
> >   .../amd/amdkfd/kfd_process_queue_manager.c| 140 ++
> >   5 files changed, 174 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> > index 55cb1b2094fd..369915337213 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> > @@ -198,6 +198,10 @@ uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct 
> > kgd_dev *dst, struct kgd_dev *s
> >   valid;  \
> >   })
> >
> > +int amdgpu_amdkfd_update_cu_mask_for_process(struct task_struct *task,
> > + struct amdgpu_device *adev, unsigned long *lgpu_bitmap,
> > + unsigned int nbits);
> > +
> >   /* GPUVM API */
> >   int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, unsigned 
> > int pasid,
> >   void **vm, void **process_info,
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > index 163a4fbf0611..8abeffdd2e5b 100644
>

Re: [PATCH RFC v4 07/16] drm, cgroup: Add total GEM buffer allocation limit

2019-11-28 Thread Kenny Ho
On Tue, Oct 1, 2019 at 10:30 AM Michal Koutný  wrote:
> On Thu, Aug 29, 2019 at 02:05:24AM -0400, Kenny Ho  wrote:
> > drm.buffer.default
> > A read-only flat-keyed file which exists on the root cgroup.
> > Each entry is keyed by the drm device's major:minor.
> >
> > Default limits on the total GEM buffer allocation in bytes.
> What is the purpose of this attribute (and alikes for other resources)?
> I can't see it being set differently but S64_MAX in
> drmcg_device_early_init.

cgroup has a number of conventions and one of which is the idea of a
default.  The idea here is to allow for device specific defaults.  For
this specific resource, I can probably not expose it since it's not
particularly useful, but for other resources (such as the lgpu
resource) the concept of a default is useful (for example, different
devices can have different number of lgpu.)


> > +static ssize_t drmcg_limit_write(struct kernfs_open_file *of, char *buf,
> > [...]
> > + switch (type) {
> > + case DRMCG_TYPE_BO_TOTAL:
> > + p_max = parent == NULL ? S64_MAX :
> > + parent->dev_resources[minor]->
> > + bo_limits_total_allocated;
> > +
> > + rc = drmcg_process_limit_s64_val(sattr, true,
> > + props->bo_limits_total_allocated_default,
> > + p_max,
> > + );
> IIUC, this allows initiating the particular limit value based either on
> parent or the default per-device value. This is alas rather an
> antipattern. The most stringent limit on the path from a cgroup to the
> root should be applied at the charging time. However, the child should
> not inherit the verbatim value from the parent (may race with parent and
> it won't be updated upon parent change).
I think this was a mistake during one of my refactor and I shrunk the
critical section protected by a mutex a bit too much.  But you are
right in the sense that I don't propagate the limits downward to the
children when the parent's limit is updated.  But from the user
interface perspective, wouldn't this be confusing?  When a sysadmin
sets a limit using the 'max' keyword, the value would be a global one
even though the actual allowable maximum for the particular cgroup is
less in reality because of the ancestor cgroups?  (If this is the
established norm, I am ok to go along but seems confusing to me.)  I
am probably missing something because as I implemented this, the 'max'
and 'default' semantic has been confusing to me especially for the
children cgroups due to the context of the ancestors.

> You already do the appropriate hierarchical check in
> drmcg_try_chb_bo_alloc, so the parent propagation could be simply
> dropped if I'm not mistaken.
I will need to double check.  But I think interaction between parent
and children (or perhaps between siblings) will be needed eventually
because there seems to be a desire to implement "weight" type of
resource.  Also, from performance perspective, wouldn't it make more
sense to make sure the limits are set correctly during configuration
than to have to check all the cgroups up through the parents?  I don't
have comprehensive knowledge of the implementation of other cgroup
controllers so if more experience folks can comment that would be
great.  (Although, I probably should just do one approach instead of
doing both... or 1.5.)

>
> Also, I can't find how the read of
> parent->dev_resources[minor]->bo_limits_total_allocated and its
> concurrent update are synchronized (i.e. someone writing
> buffer.total.max for parent and child in parallel). (It may just my
> oversight.)
This is probably the refactor mistake I mentioned earlier.

Regards,
Kenny
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH RFC v4 14/16] drm, cgroup: Introduce lgpu as DRM cgroup resource

2019-10-09 Thread Kenny Ho
Hi Daniel,

Can you elaborate what you mean in more details?  The goal of lgpu is
to provide the ability to subdivide a GPU device and give those slices
to different users as needed.  I don't think there is anything
controversial or vendor specific here as requests for this are well
documented.  The underlying representation is just a bitmap, which is
neither unprecedented nor vendor specific (bitmap is used in cpuset
for instance.)

An implementation of this abstraction is not hardware specific either.
For example, one can associate a virtual function in SRIOV as a lgpu.
Alternatively, a device can also declare to have 100 lgpus and treat
the lgpu quantity as a percentage representation of GPU subdivision.
The fact that an abstraction works well with a vendor implementation
does not make it a "prettification" of a vendor feature (by this
logic, I hope you are not implying an abstraction is only valid if it
does not work with amd CU masking because that seems fairly partisan.)

Did I misread your characterization of this patch?

Regards,
Kenny


On Wed, Oct 9, 2019 at 6:31 AM Daniel Vetter  wrote:
>
> On Tue, Oct 08, 2019 at 06:53:18PM +, Kuehling, Felix wrote:
> > On 2019-08-29 2:05 a.m., Kenny Ho wrote:
> > > drm.lgpu
> > >  A read-write nested-keyed file which exists on all cgroups.
> > >  Each entry is keyed by the DRM device's major:minor.
> > >
> > >  lgpu stands for logical GPU, it is an abstraction used to
> > >  subdivide a physical DRM device for the purpose of resource
> > >  management.
> > >
> > >  The lgpu is a discrete quantity that is device specific (i.e.
> > >  some DRM devices may have 64 lgpus while others may have 100
> > >  lgpus.)  The lgpu is a single quantity with two representations
> > >  denoted by the following nested keys.
> > >
> > >= 
> > >count Representing lgpu as anonymous resource
> > >list  Representing lgpu as named resource
> > >= 
> > >
> > >  For example:
> > >  226:0 count=256 list=0-255
> > >  226:1 count=4 list=0,2,4,6
> > >  226:2 count=32 list=32-63
> > >
> > >  lgpu is represented by a bitmap and uses the bitmap_parselist
> > >  kernel function so the list key input format is a
> > >  comma-separated list of decimal numbers and ranges.
> > >
> > >  Consecutively set bits are shown as two hyphen-separated decimal
> > >  numbers, the smallest and largest bit numbers set in the range.
> > >  Optionally each range can be postfixed to denote that only parts
> > >  of it should be set.  The range will divided to groups of
> > >  specific size.
> > >  Syntax: range:used_size/group_size
> > >  Example: 0-1023:2/256 ==> 0,1,256,257,512,513,768,769
> > >
> > >  The count key is the hamming weight / hweight of the bitmap.
> > >
> > >  Both count and list accept the max and default keywords.
> > >
> > >      Some DRM devices may only support lgpu as anonymous resources.
> > >  In such case, the significance of the position of the set bits
> > >  in list will be ignored.
> > >
> > >  This lgpu resource supports the 'allocation' resource
> > >  distribution model.
> > >
> > > Change-Id: I1afcacf356770930c7f925df043e51ad06ceb98e
> > > Signed-off-by: Kenny Ho 
> >
> > The description sounds reasonable to me and maps well to the CU masking
> > feature in our GPUs.
> >
> > It would also allow us to do more coarse-grained masking for example to
> > guarantee balanced allocation of CUs across shader engines or
> > partitioning of memory bandwidth or CP pipes (if that is supported by
> > the hardware/firmware).
>
> Hm, so this sounds like the definition for how this cgroup is supposed to
> work is "amd CU masking" (whatever that exactly is). And the abstract
> description is just prettification on top, but not actually the real
> definition you guys want.
>
> I think adding a cgroup which is that much depending upon the hw
> implementation of the first driver supporting it is not a good idea.
> -Daniel
>
> >
> > I can't comment on the code as I'm unfamiliar with the details of the
> > cgroup code.
> >
> > Acked-by: Felix Kuehling 

[PATCH v2 08/11] drm, cgroup: Add peak GEM buffer allocation limit

2020-02-26 Thread Kenny Ho
gpu.buffer.peak.default
A read-only flat-keyed file which exists on the root cgroup.
Each entry is keyed by the drm device's major:minor.

Default limits on the largest GEM buffer allocation in bytes.

gpu.buffer.peak.max
A read-write flat-keyed file which exists on all cgroups.  Each
entry is keyed by the drm device's major:minor.

Per device limits on the largest GEM buffer allocation in bytes.
This is a hard limit.  Attempts in allocating beyond the cgroup
limit will result in ENOMEM.  Shorthand understood by memparse
(such as k, m, g) can be used.

Set largest allocation for /dev/dri/card1 to 4MB
echo "226:1 4m" > gpu.buffer.peak.max

Change-Id: I5ab3fb4a442b6cbd5db346be595897c90217da69
Signed-off-by: Kenny Ho 
---
 Documentation/admin-guide/cgroup-v2.rst | 18 +++
 include/drm/drm_cgroup.h|  1 +
 include/linux/cgroup_drm.h  |  1 +
 kernel/cgroup/drm.c | 43 +
 4 files changed, 63 insertions(+)

diff --git a/Documentation/admin-guide/cgroup-v2.rst 
b/Documentation/admin-guide/cgroup-v2.rst
index f2d7abf5c783..581343472651 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -2108,6 +2108,24 @@ GPU Interface Files
Set allocation limit for /dev/dri/card0 to 512MB
echo "226:0 512m" > gpu.buffer.total.max
 
+  gpu.buffer.peak.default
+   A read-only flat-keyed file which exists on the root cgroup.
+   Each entry is keyed by the drm device's major:minor.
+
+   Default limits on the largest GEM buffer allocation in bytes.
+
+  gpu.buffer.peak.max
+   A read-write flat-keyed file which exists on all cgroups.  Each
+   entry is keyed by the drm device's major:minor.
+
+   Per device limits on the largest GEM buffer allocation in bytes.
+   This is a hard limit.  Attempts in allocating beyond the cgroup
+   limit will result in ENOMEM.  Shorthand understood by memparse
+   (such as k, m, g) can be used.
+
+   Set largest allocation for /dev/dri/card1 to 4MB
+   echo "226:1 4m" > gpu.buffer.peak.max
+
 GEM Buffer Ownership
 
 
diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h
index 2783e56690db..2b41d4d22e33 100644
--- a/include/drm/drm_cgroup.h
+++ b/include/drm/drm_cgroup.h
@@ -16,6 +16,7 @@ struct drmcg_props {
boollimit_enforced;
 
s64 bo_limits_total_allocated_default;
+   s64 bo_limits_peak_allocated_default;
 };
 
 void drmcg_bind(struct drm_minor (*(*acq_dm)(unsigned int minor_id)),
diff --git a/include/linux/cgroup_drm.h b/include/linux/cgroup_drm.h
index 71023654fb77..aba3b26718c0 100644
--- a/include/linux/cgroup_drm.h
+++ b/include/linux/cgroup_drm.h
@@ -29,6 +29,7 @@ struct drmcg_device_resource {
s64 bo_limits_total_allocated;
 
s64 bo_stats_peak_allocated;
+   s64 bo_limits_peak_allocated;
 
s64 bo_stats_count_allocated;
 };
diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
index 4b19e533941d..62d2a9d33d0c 100644
--- a/kernel/cgroup/drm.c
+++ b/kernel/cgroup/drm.c
@@ -95,6 +95,9 @@ static inline int init_drmcg_single(struct drmcg *drmcg, 
struct drm_device *dev)
ddr->bo_limits_total_allocated =
dev->drmcg_props.bo_limits_total_allocated_default;
 
+   ddr->bo_limits_peak_allocated =
+   dev->drmcg_props.bo_limits_peak_allocated_default;
+
return 0;
 }
 
@@ -305,6 +308,9 @@ static void drmcg_print_limits(struct drmcg_device_resource 
*ddr,
case DRMCG_TYPE_BO_TOTAL:
seq_printf(sf, "%lld\n", ddr->bo_limits_total_allocated);
break;
+   case DRMCG_TYPE_BO_PEAK:
+   seq_printf(sf, "%lld\n", ddr->bo_limits_peak_allocated);
+   break;
default:
seq_puts(sf, "\n");
break;
@@ -319,6 +325,10 @@ static void drmcg_print_default(struct drmcg_props *props,
seq_printf(sf, "%lld\n",
props->bo_limits_total_allocated_default);
break;
+   case DRMCG_TYPE_BO_PEAK:
+   seq_printf(sf, "%lld\n",
+   props->bo_limits_peak_allocated_default);
+   break;
default:
seq_puts(sf, "\n");
break;
@@ -476,6 +486,19 @@ static ssize_t drmcg_limit_write(struct kernfs_open_file 
*of, char *buf,
 
ddr->bo_limits_total_allocated = val;
break;
+   case DRMCG_TYPE_BO_PEAK:
+   rc = drmcg_process_limit_s64_val

[PATCH v2 11/11] drm/amdgpu: Integrate with DRM cgroup

2020-02-26 Thread Kenny Ho
The number of compute unit (CU) for a device is used for the gpu cgroup
compute capacity.  The gpu cgroup compute allocation limit only applies
to compute workload for the moment (enforced via kfd queue creation.)
Any cu_mask update is validated against the availability of the compute
unit as defined by the drmcg the kfd process belongs to.

Change-Id: I2930e76ef9ac6d36d0feb81f604c89a4208e6614
Signed-off-by: Kenny Ho 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|   4 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |  29 
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  |   7 +
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |   3 +
 .../amd/amdkfd/kfd_process_queue_manager.c| 153 ++
 5 files changed, 196 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 0ee8aae6c519..1efbc0d3c03e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -199,6 +199,10 @@ uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev 
*dst, struct kgd_dev *s
valid;  \
})
 
+int amdgpu_amdkfd_update_cu_mask_for_process(struct task_struct *task,
+   struct amdgpu_device *adev, unsigned long *compute_bm,
+   unsigned int compute_bm_size);
+
 /* GPUVM API */
 int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, unsigned int 
pasid,
void **vm, void **process_info,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 171397708855..595ad852080b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1418,9 +1418,31 @@ amdgpu_get_crtc_scanout_position(struct drm_device *dev, 
unsigned int pipe,
 static void amdgpu_drmcg_custom_init(struct drm_device *dev,
struct drmcg_props *props)
 {
+   struct amdgpu_device *adev = dev->dev_private;
+
+   props->compute_capacity = adev->gfx.cu_info.number;
+   bitmap_zero(props->compute_slots, MAX_DRMCG_COMPUTE_CAPACITY);
+   bitmap_fill(props->compute_slots, props->compute_capacity);
+
props->limit_enforced = true;
 }
 
+static void amdgpu_drmcg_limit_updated(struct drm_device *dev,
+   struct task_struct *task, struct drmcg_device_resource *ddr,
+   enum drmcg_res_type res_type)
+{
+   struct amdgpu_device *adev = dev->dev_private;
+
+   switch (res_type) {
+   case DRMCG_TYPE_COMPUTE:
+   amdgpu_amdkfd_update_cu_mask_for_process(task, adev,
+ddr->compute_eff, dev->drmcg_props.compute_capacity);
+   break;
+   default:
+   break;
+   }
+}
+
 #else
 
 static void amdgpu_drmcg_custom_init(struct drm_device *dev,
@@ -1428,6 +1450,12 @@ static void amdgpu_drmcg_custom_init(struct drm_device 
*dev,
 {
 }
 
+static void amdgpu_drmcg_limit_updated(struct drm_device *dev,
+   struct task_struct *task, struct drmcg_device_resource *ddr,
+   enum drmcg_res_type res_type)
+{
+}
+
 #endif /* CONFIG_CGROUP_DRM */
 
 static struct drm_driver kms_driver = {
@@ -1462,6 +1490,7 @@ static struct drm_driver kms_driver = {
.gem_prime_mmap = amdgpu_gem_prime_mmap,
 
.drmcg_custom_init = amdgpu_drmcg_custom_init,
+   .drmcg_limit_updated = amdgpu_drmcg_limit_updated,
 
.name = DRIVER_NAME,
.desc = DRIVER_DESC,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 675735b8243a..a35596f2dc4e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -451,6 +451,13 @@ static int kfd_ioctl_set_cu_mask(struct file *filp, struct 
kfd_process *p,
return -EFAULT;
}
 
+   if (!pqm_drmcg_compute_validate(p, args->queue_id,
+properties.cu_mask, cu_mask_size)) {
+   pr_debug("CU mask not permitted by DRM Cgroup");
+   kfree(properties.cu_mask);
+   return -EACCES;
+   }
+
mutex_lock(>mutex);
 
retval = pqm_set_cu_mask(>pqm, args->queue_id, );
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 063096ec832d..0fb619586e24 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -929,6 +929,9 @@ int pqm_get_wave_state(struct process_queue_manager *pqm,
   u32 *ctl_stack_used_size,
   u32 *save_area_used_size);
 
+bool pqm_drmcg_compute_validate(struct kfd_process *p, int qid, u32 *cu_mask,
+   unsigned int cu_mask_size);
+
 int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
  unsigned int fence_value,
  unsign

[PATCH v2 06/11] drm, cgroup: Add GEM buffer allocation count stats

2020-02-26 Thread Kenny Ho
gpu.buffer.count.stats
A read-only flat-keyed file which exists on all cgroups.  Each
entry is keyed by the drm device's major:minor.

Total number of GEM buffer allocated.

Change-Id: Iad29bdf44390dbcee07b1e72ea0ff811aa3b9dcd
Signed-off-by: Kenny Ho 
---
 Documentation/admin-guide/cgroup-v2.rst |  6 ++
 include/linux/cgroup_drm.h  |  3 +++
 kernel/cgroup/drm.c | 22 +++---
 3 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/Documentation/admin-guide/cgroup-v2.rst 
b/Documentation/admin-guide/cgroup-v2.rst
index 6199cc9a978f..065f2b52da57 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -2081,6 +2081,12 @@ GPU Interface Files
 
Largest (high water mark) GEM buffer allocated in bytes.
 
+  gpu.buffer.count.stats
+   A read-only flat-keyed file which exists on all cgroups.  Each
+   entry is keyed by the drm device's major:minor.
+
+   Total number of GEM buffer allocated.
+
 GEM Buffer Ownership
 
 
diff --git a/include/linux/cgroup_drm.h b/include/linux/cgroup_drm.h
index d90807627213..103868d972d0 100644
--- a/include/linux/cgroup_drm.h
+++ b/include/linux/cgroup_drm.h
@@ -14,6 +14,7 @@
 enum drmcg_res_type {
DRMCG_TYPE_BO_TOTAL,
DRMCG_TYPE_BO_PEAK,
+   DRMCG_TYPE_BO_COUNT,
__DRMCG_TYPE_LAST,
 };
 
@@ -27,6 +28,8 @@ struct drmcg_device_resource {
s64 bo_stats_total_allocated;
 
s64 bo_stats_peak_allocated;
+
+   s64 bo_stats_count_allocated;
 };
 
 /**
diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
index 68b23693418b..5a700833a304 100644
--- a/kernel/cgroup/drm.c
+++ b/kernel/cgroup/drm.c
@@ -280,6 +280,9 @@ static void drmcg_print_stats(struct drmcg_device_resource 
*ddr,
case DRMCG_TYPE_BO_PEAK:
seq_printf(sf, "%lld\n", ddr->bo_stats_peak_allocated);
break;
+   case DRMCG_TYPE_BO_COUNT:
+   seq_printf(sf, "%lld\n", ddr->bo_stats_count_allocated);
+   break;
default:
seq_puts(sf, "\n");
break;
@@ -334,6 +337,12 @@ struct cftype files[] = {
.private = DRMCG_CTF_PRIV(DRMCG_TYPE_BO_PEAK,
DRMCG_FTYPE_STATS),
},
+   {
+   .name = "buffer.count.stats",
+   .seq_show = drmcg_seq_show,
+   .private = DRMCG_CTF_PRIV(DRMCG_TYPE_BO_COUNT,
+   DRMCG_FTYPE_STATS),
+   },
{ } /* terminate */
 };
 
@@ -385,6 +394,8 @@ void drmcg_chg_bo_alloc(struct drmcg *drmcg, struct 
drm_device *dev,
 
if (ddr->bo_stats_peak_allocated < (s64)size)
ddr->bo_stats_peak_allocated = (s64)size;
+
+   ddr->bo_stats_count_allocated++;
}
mutex_unlock(>drmcg_mutex);
 }
@@ -402,15 +413,20 @@ EXPORT_SYMBOL(drmcg_chg_bo_alloc);
 void drmcg_unchg_bo_alloc(struct drmcg *drmcg, struct drm_device *dev,
size_t size)
 {
+   struct drmcg_device_resource *ddr;
int devIdx = dev->primary->index;
 
if (drmcg == NULL)
return;
 
mutex_lock(>drmcg_mutex);
-   for ( ; drmcg != NULL; drmcg = drmcg_parent(drmcg))
-   drmcg->dev_resources[devIdx]->bo_stats_total_allocated
-   -= (s64)size;
+   for ( ; drmcg != NULL; drmcg = drmcg_parent(drmcg)) {
+   ddr = drmcg->dev_resources[devIdx];
+
+   ddr->bo_stats_total_allocated -= (s64)size;
+
+   ddr->bo_stats_count_allocated--;
+   }
mutex_unlock(>drmcg_mutex);
 }
 EXPORT_SYMBOL(drmcg_unchg_bo_alloc);
-- 
2.25.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v2 07/11] drm, cgroup: Add total GEM buffer allocation limit

2020-02-26 Thread Kenny Ho
The drm resource being limited here is the GEM buffer objects.  User
applications allocate and free these buffers.  In addition, a process
can allocate a buffer and share it with another process.  The consumer
of a shared buffer can also outlive the allocator of the buffer.

For the purpose of cgroup accounting and limiting, ownership of the
buffer is deemed to be the cgroup for which the allocating process
belongs to.  There is one cgroup limit per drm device.

The limiting functionality is added to the previous stats collection
function.  The drm_gem_private_object_init is modified to have a return
value to allow failure due to cgroup limit.

The try_chg function only fails if the DRM cgroup properties has
limit_enforced set to true for the DRM device.  This is to allow the DRM
cgroup controller to collect usage stats without enforcing the limits.

gpu.buffer.default
A read-only flat-keyed file which exists on the root cgroup.
Each entry is keyed by the drm device's major:minor.

Default limits on the total GEM buffer allocation in bytes.

gpu.buffer.max
A read-write flat-keyed file which exists on all cgroups.  Each
entry is keyed by the drm device's major:minor.

Per device limits on the total GEM buffer allocation in byte.
This is a hard limit.  Attempts in allocating beyond the cgroup
limit will result in ENOMEM.  Shorthand understood by memparse
(such as k, m, g) can be used.

Set allocation limit for /dev/dri/card1 to 1GB
echo "226:1 1g" > gpu.buffer.total.max

Set allocation limit for /dev/dri/card0 to 512MB
echo "226:0 512m" > gpu.buffer.total.max

Change-Id: Id3265bbd0fafe84a16b59617df79bd32196160be
Signed-off-by: Kenny Ho 
---
 Documentation/admin-guide/cgroup-v2.rst|  21 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c|  19 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |   6 +-
 drivers/gpu/drm/drm_gem.c  |  11 +-
 include/drm/drm_cgroup.h   |   8 +-
 include/drm/drm_gem.h  |   2 +-
 include/linux/cgroup_drm.h |   1 +
 kernel/cgroup/drm.c| 227 -
 8 files changed, 278 insertions(+), 17 deletions(-)

diff --git a/Documentation/admin-guide/cgroup-v2.rst 
b/Documentation/admin-guide/cgroup-v2.rst
index 065f2b52da57..f2d7abf5c783 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -2087,6 +2087,27 @@ GPU Interface Files
 
Total number of GEM buffer allocated.
 
+  gpu.buffer.default
+   A read-only flat-keyed file which exists on the root cgroup.
+   Each entry is keyed by the drm device's major:minor.
+
+   Default limits on the total GEM buffer allocation in bytes.
+
+  gpu.buffer.max
+   A read-write flat-keyed file which exists on all cgroups.  Each
+   entry is keyed by the drm device's major:minor.
+
+   Per device limits on the total GEM buffer allocation in byte.
+   This is a hard limit.  Attempts in allocating beyond the cgroup
+   limit will result in ENOMEM.  Shorthand understood by memparse
+   (such as k, m, g) can be used.
+
+   Set allocation limit for /dev/dri/card1 to 1GB
+   echo "226:1 1g" > gpu.buffer.total.max
+
+   Set allocation limit for /dev/dri/card0 to 512MB
+   echo "226:0 512m" > gpu.buffer.total.max
+
 GEM Buffer Ownership
 
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 6e1faf8a2bca..171397708855 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1413,6 +1413,23 @@ amdgpu_get_crtc_scanout_position(struct drm_device *dev, 
unsigned int pipe,
  stime, etime, mode);
 }
 
+#ifdef CONFIG_CGROUP_DRM
+
+static void amdgpu_drmcg_custom_init(struct drm_device *dev,
+   struct drmcg_props *props)
+{
+   props->limit_enforced = true;
+}
+
+#else
+
+static void amdgpu_drmcg_custom_init(struct drm_device *dev,
+   struct drmcg_props *props)
+{
+}
+
+#endif /* CONFIG_CGROUP_DRM */
+
 static struct drm_driver kms_driver = {
.driver_features =
DRIVER_USE_AGP | DRIVER_ATOMIC |
@@ -1444,6 +1461,8 @@ static struct drm_driver kms_driver = {
.gem_prime_vunmap = amdgpu_gem_prime_vunmap,
.gem_prime_mmap = amdgpu_gem_prime_mmap,
 
+   .drmcg_custom_init = amdgpu_drmcg_custom_init,
+
.name = DRIVER_NAME,
.desc = DRIVER_DESC,
.date = DRIVER_DATE,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 5766d20f29d8..4d08ccbc541a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -34,6 +34,7 @@
 
 #include 
 #include 
+#include 
 #include "amdgpu.h"

[PATCH v2 05/11] drm, cgroup: Add peak GEM buffer allocation stats

2020-02-26 Thread Kenny Ho
gpu.buffer.peak.stats
A read-only flat-keyed file which exists on all cgroups.  Each
entry is keyed by the drm device's major:minor.

Largest (high water mark) GEM buffer allocated in bytes.

Change-Id: I40fe4c13c1cea8613b3e04b802f3e1f19eaab4fc
Signed-off-by: Kenny Ho 
---
 Documentation/admin-guide/cgroup-v2.rst |  6 ++
 include/linux/cgroup_drm.h  |  3 +++
 kernel/cgroup/drm.c | 12 
 3 files changed, 21 insertions(+)

diff --git a/Documentation/admin-guide/cgroup-v2.rst 
b/Documentation/admin-guide/cgroup-v2.rst
index c041e672cc10..6199cc9a978f 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -2075,6 +2075,12 @@ GPU Interface Files
 
Total GEM buffer allocation in bytes.
 
+  gpu.buffer.peak.stats
+   A read-only flat-keyed file which exists on all cgroups.  Each
+   entry is keyed by the drm device's major:minor.
+
+   Largest (high water mark) GEM buffer allocated in bytes.
+
 GEM Buffer Ownership
 
 
diff --git a/include/linux/cgroup_drm.h b/include/linux/cgroup_drm.h
index 34b0aec7c964..d90807627213 100644
--- a/include/linux/cgroup_drm.h
+++ b/include/linux/cgroup_drm.h
@@ -13,6 +13,7 @@
 
 enum drmcg_res_type {
DRMCG_TYPE_BO_TOTAL,
+   DRMCG_TYPE_BO_PEAK,
__DRMCG_TYPE_LAST,
 };
 
@@ -24,6 +25,8 @@ enum drmcg_res_type {
 struct drmcg_device_resource {
/* for per device stats */
s64 bo_stats_total_allocated;
+
+   s64 bo_stats_peak_allocated;
 };
 
 /**
diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
index addb096edac5..68b23693418b 100644
--- a/kernel/cgroup/drm.c
+++ b/kernel/cgroup/drm.c
@@ -277,6 +277,9 @@ static void drmcg_print_stats(struct drmcg_device_resource 
*ddr,
case DRMCG_TYPE_BO_TOTAL:
seq_printf(sf, "%lld\n", ddr->bo_stats_total_allocated);
break;
+   case DRMCG_TYPE_BO_PEAK:
+   seq_printf(sf, "%lld\n", ddr->bo_stats_peak_allocated);
+   break;
default:
seq_puts(sf, "\n");
break;
@@ -325,6 +328,12 @@ struct cftype files[] = {
.private = DRMCG_CTF_PRIV(DRMCG_TYPE_BO_TOTAL,
DRMCG_FTYPE_STATS),
},
+   {
+   .name = "buffer.peak.stats",
+   .seq_show = drmcg_seq_show,
+   .private = DRMCG_CTF_PRIV(DRMCG_TYPE_BO_PEAK,
+   DRMCG_FTYPE_STATS),
+   },
{ } /* terminate */
 };
 
@@ -373,6 +382,9 @@ void drmcg_chg_bo_alloc(struct drmcg *drmcg, struct 
drm_device *dev,
ddr = drmcg->dev_resources[devIdx];
 
ddr->bo_stats_total_allocated += (s64)size;
+
+   if (ddr->bo_stats_peak_allocated < (s64)size)
+   ddr->bo_stats_peak_allocated = (s64)size;
}
mutex_unlock(>drmcg_mutex);
 }
-- 
2.25.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v2 09/11] drm, cgroup: Add compute as gpu cgroup resource

2020-02-26 Thread Kenny Ho
gpu.compute.weight
  A read-write flat-keyed file which exists on all cgroups.  The
  default weight is 100.  Each entry is keyed by the DRM device's
  major:minor (the primary minor).  The weights are in the range [1,
  1] and specifies the relative amount of physical partitions
  the cgroup can use in relation to its siblings.  The partition
  concept here is analogous to the subdevice of OpenCL.

gpu.compute.effective
  A read-only nested-keyed file which exists on all cgroups.  Each
  entry is keyed by the DRM device's major:minor.

  It lists the GPU subdevices that are actually granted to this
  cgroup by its parent.  These subdevices are allowed to be used by
  tasks within the current cgroup.

  = ==
  count The total number of granted subdevices
  list  Enumeration of the subdevices
  = ==

Change-Id: Idde0ef9a331fd67bb9c7eb8ef9978439e6452488
Signed-off-by: Kenny Ho 
---
 Documentation/admin-guide/cgroup-v2.rst |  21 +++
 include/drm/drm_cgroup.h|   3 +
 include/linux/cgroup_drm.h  |  16 +++
 kernel/cgroup/drm.c | 177 +++-
 4 files changed, 215 insertions(+), 2 deletions(-)

diff --git a/Documentation/admin-guide/cgroup-v2.rst 
b/Documentation/admin-guide/cgroup-v2.rst
index 581343472651..f92f1f4a64d4 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -2126,6 +2126,27 @@ GPU Interface Files
Set largest allocation for /dev/dri/card1 to 4MB
echo "226:1 4m" > gpu.buffer.peak.max
 
+  gpu.compute.weight
+   A read-write flat-keyed file which exists on all cgroups.  The
+   default weight is 100.  Each entry is keyed by the DRM device's
+   major:minor (the primary minor).  The weights are in the range
+   [1, 1] and specifies the relative amount of physical partitions 
+   the cgroup can use in relation to its siblings.  The partition
+   concept here is analogous to the subdevice concept of OpenCL.
+
+  gpu.compute.effective
+   A read-only nested-keyed file which exists on all cgroups.
+   Each entry is keyed by the DRM device's major:minor.
+
+   It lists the GPU subdevices that are actually granted to this
+   cgroup by its parent.  These subdevices are allowed to be used
+   by tasks within the current cgroup.
+
+ = ==
+ count The total number of granted subdevices
+ list  Enumeration of the subdevices
+ = ==
+
 GEM Buffer Ownership
 
 
diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h
index 2b41d4d22e33..5aac47ca536f 100644
--- a/include/drm/drm_cgroup.h
+++ b/include/drm/drm_cgroup.h
@@ -17,6 +17,9 @@ struct drmcg_props {
 
s64 bo_limits_total_allocated_default;
s64 bo_limits_peak_allocated_default;
+
+   int compute_capacity;
+   DECLARE_BITMAP(compute_slots, MAX_DRMCG_COMPUTE_CAPACITY);
 };
 
 void drmcg_bind(struct drm_minor (*(*acq_dm)(unsigned int minor_id)),
diff --git a/include/linux/cgroup_drm.h b/include/linux/cgroup_drm.h
index aba3b26718c0..fd02f59cabab 100644
--- a/include/linux/cgroup_drm.h
+++ b/include/linux/cgroup_drm.h
@@ -11,10 +11,14 @@
 /* limit defined per the way drm_minor_alloc operates */
 #define MAX_DRM_DEV (64 * DRM_MINOR_RENDER)
 
+#define MAX_DRMCG_COMPUTE_CAPACITY 256
+
 enum drmcg_res_type {
DRMCG_TYPE_BO_TOTAL,
DRMCG_TYPE_BO_PEAK,
DRMCG_TYPE_BO_COUNT,
+   DRMCG_TYPE_COMPUTE,
+   DRMCG_TYPE_COMPUTE_EFF,
__DRMCG_TYPE_LAST,
 };
 
@@ -32,6 +36,18 @@ struct drmcg_device_resource {
s64 bo_limits_peak_allocated;
 
s64 bo_stats_count_allocated;
+
+/* compute_stg is used to calculate _eff before applying to _eff
+* after considering the entire hierarchy
+*/
+   DECLARE_BITMAP(compute_stg, MAX_DRMCG_COMPUTE_CAPACITY);
+   /* user configurations */
+   s64 compute_weight;
+   /* effective compute for the cgroup after considering
+* relationship with other cgroup
+*/
+   s64 compute_count_eff;
+   DECLARE_BITMAP(compute_eff, MAX_DRMCG_COMPUTE_CAPACITY);
 };
 
 /**
diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
index 62d2a9d33d0c..2eadabebdfea 100644
--- a/kernel/cgroup/drm.c
+++ b/kernel/cgroup/drm.c
@@ -9,6 +9,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -98,6 +99,11 @@ static inline int init_drmcg_single(struct drmcg *drmcg, 
struct drm_device *dev)
ddr->bo_limit

[PATCH v2 03/11] drm, cgroup: Initialize drmcg properties

2020-02-26 Thread Kenny Ho
drmcg initialization involves allocating a per cgroup, per device data
structure and setting the defaults.  There are two entry points for
drmcg init:

1) When struct drmcg is created via css_alloc, initialization is done
for each device

2) When DRM devices are created after drmcgs are created
  a) Per device drmcg data structure is allocated at the beginning of
  DRM device creation such that drmcg can begin tracking usage
  statistics
  b) At the end of DRM device creation, drmcg_register_dev will update in
  case device specific defaults need to be applied.

Entry point #2 usually applies to the root cgroup since it can be
created before DRM devices are available.  The drmcg controller will go
through all existing drm cgroups and initialize them with the new device
accordingly.

Change-Id: I64e421d8dfcc22ee8282cc1305960e20c2704db7
Signed-off-by: Kenny Ho 
---
 drivers/gpu/drm/drm_drv.c  |   4 ++
 include/drm/drm_cgroup.h   |  18 +++
 include/drm/drm_device.h   |   7 +++
 include/drm/drm_drv.h  |   9 
 include/linux/cgroup_drm.h |  12 +
 kernel/cgroup/drm.c| 105 +
 6 files changed, 155 insertions(+)

diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index e418a61f5c85..e10bd42ebdba 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -644,6 +644,7 @@ int drm_dev_init(struct drm_device *dev,
mutex_init(>filelist_mutex);
mutex_init(>clientlist_mutex);
mutex_init(>master_mutex);
+   mutex_init(>drmcg_mutex);
 
dev->anon_inode = drm_fs_inode_new();
if (IS_ERR(dev->anon_inode)) {
@@ -680,6 +681,7 @@ int drm_dev_init(struct drm_device *dev,
if (ret)
goto err_setunique;
 
+   drmcg_device_early_init(dev);
return 0;
 
 err_setunique:
@@ -694,6 +696,7 @@ int drm_dev_init(struct drm_device *dev,
drm_fs_inode_free(dev->anon_inode);
 err_free:
put_device(dev->dev);
+   mutex_destroy(>drmcg_mutex);
mutex_destroy(>master_mutex);
mutex_destroy(>clientlist_mutex);
mutex_destroy(>filelist_mutex);
@@ -770,6 +773,7 @@ void drm_dev_fini(struct drm_device *dev)
 
put_device(dev->dev);
 
+   mutex_destroy(>drmcg_mutex);
mutex_destroy(>master_mutex);
mutex_destroy(>clientlist_mutex);
mutex_destroy(>filelist_mutex);
diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h
index 530c9a0b3238..fda426fba035 100644
--- a/include/drm/drm_cgroup.h
+++ b/include/drm/drm_cgroup.h
@@ -4,8 +4,17 @@
 #ifndef __DRM_CGROUP_H__
 #define __DRM_CGROUP_H__
 
+#include 
+
 #ifdef CONFIG_CGROUP_DRM
 
+/**
+ * Per DRM device properties for DRM cgroup controller for the purpose
+ * of storing per device defaults
+ */
+struct drmcg_props {
+};
+
 void drmcg_bind(struct drm_minor (*(*acq_dm)(unsigned int minor_id)),
void (*put_ddev)(struct drm_device *dev));
 
@@ -15,8 +24,13 @@ void drmcg_register_dev(struct drm_device *dev);
 
 void drmcg_unregister_dev(struct drm_device *dev);
 
+void drmcg_device_early_init(struct drm_device *device);
+
 #else
 
+struct drmcg_props {
+};
+
 static inline void drmcg_bind(
struct drm_minor (*(*acq_dm)(unsigned int minor_id)),
void (*put_ddev)(struct drm_device *dev))
@@ -35,5 +49,9 @@ static inline void drmcg_unregister_dev(struct drm_device 
*dev)
 {
 }
 
+static inline void drmcg_device_early_init(struct drm_device *device)
+{
+}
+
 #endif /* CONFIG_CGROUP_DRM */
 #endif /* __DRM_CGROUP_H__ */
diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h
index 1acfc3bbd3fb..a94598b8f670 100644
--- a/include/drm/drm_device.h
+++ b/include/drm/drm_device.h
@@ -8,6 +8,7 @@
 
 #include 
 #include 
+#include 
 
 struct drm_driver;
 struct drm_minor;
@@ -308,6 +309,12 @@ struct drm_device {
 */
struct drm_fb_helper *fb_helper;
 
+/** \name DRM Cgroup */
+   /*@{ */
+   struct mutex drmcg_mutex;
+   struct drmcg_props drmcg_props;
+   /*@} */
+
/* Everything below here is for legacy driver, never use! */
/* private: */
 #if IS_ENABLED(CONFIG_DRM_LEGACY)
diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h
index cf13470810a5..1f65ac4d9bbf 100644
--- a/include/drm/drm_drv.h
+++ b/include/drm/drm_drv.h
@@ -715,6 +715,15 @@ struct drm_driver {
struct drm_device *dev,
uint32_t handle);
 
+   /**
+* @drmcg_custom_init
+*
+* Optional callback used to initialize drm cgroup per device properties
+* such as resource limit defaults.
+*/
+   void (*drmcg_custom_init)(struct drm_device *dev,
+   struct drmcg_props *props);
+
/**
 * @gem_vm_ops: Driver private ops for this object
 *
diff --git a/include/linux/cgroup_drm.h b/include/linux/cgroup_drm.h
index 30

[PATCH v2 04/11] drm, cgroup: Add total GEM buffer allocation stats

2020-02-26 Thread Kenny Ho
The drm resource being measured here is the GEM buffer objects.  User
applications allocate and free these buffers.  In addition, a process
can allocate a buffer and share it with another process.  The consumer
of a shared buffer can also outlive the allocator of the buffer.

For the purpose of cgroup accounting and limiting, ownership of the
buffer is deemed to be the cgroup for which the allocating process
belongs to.  There is one cgroup stats per drm device.  Each allocation
is charged to the owning cgroup as well as all its ancestors.

Similar to the memory cgroup, migrating a process to a different cgroup
does not move the GEM buffer usages that the process started while in
previous cgroup, to the new cgroup.

The following is an example to illustrate some of the operations.  Given
the following cgroup hierarchy (The letters are cgroup names with R
being the root cgroup.  The numbers in brackets are processes.  The
processes are placed with cgroup's 'No Internal Process Constraint' in
mind, so no process is placed in cgroup B.)

R (4, 5) -- A (6)
 \
  B  C (7,8)
   \
D (9)

Here is a list of operation and the associated effect on the size
track by the cgroups (for simplicity, each buffer is 1 unit in size.)

==  ==  ==  ==  ==  ===
R   A   B   C   D   Ops
==  ==  ==  ==  ==  ===
1   0   0   0   0   4 allocated a buffer
1   0   0   0   0   4 shared a buffer with 5
1   0   0   0   0   4 shared a buffer with 9
2   0   1   0   1   9 allocated a buffer
3   0   2   1   1   7 allocated a buffer
3   0   2   1   1   7 shared a buffer with 8
3   0   2   1   1   7 sharing with 9
3   0   2   1   1   7 release a buffer
3   0   2   1   1   7 migrate to cgroup D
3   0   2   1   1   9 release a buffer from 7
2   0   1   0   1   8 release a buffer from 7 (last ref to shared buf)
==  ==  ==  ==  ==  ===

gpu.buffer.stats
A read-only flat-keyed file which exists on all cgroups.  Each
entry is keyed by the drm device's major:minor.

Total GEM buffer allocation in bytes.

Change-Id: Ibc1f646ca7dbc588e2d11802b156b524696a23e7
Signed-off-by: Kenny Ho 
---
 Documentation/admin-guide/cgroup-v2.rst |  50 +-
 drivers/gpu/drm/drm_gem.c   |   9 ++
 include/drm/drm_cgroup.h|  16 +++
 include/drm/drm_gem.h   |  10 ++
 include/linux/cgroup_drm.h  |   6 ++
 kernel/cgroup/drm.c | 126 
 6 files changed, 216 insertions(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/cgroup-v2.rst 
b/Documentation/admin-guide/cgroup-v2.rst
index 7deff912185e..c041e672cc10 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -63,6 +63,7 @@ v1 is available under Documentation/admin-guide/cgroup-v1/.
5-7-1. RDMA Interface Files
  5-8. GPU
5-8-1. GPU Interface Files
+   5-8-2. GEM Buffer Ownership
  5-9. Misc
5-9-1. perf_event
  5-N. Non-normative information
@@ -2068,7 +2069,54 @@ of GPU-related resources.
 GPU Interface Files
 
 
-TODO
+  gpu.buffer.stats
+   A read-only flat-keyed file which exists on all cgroups.  Each
+   entry is keyed by the drm device's major:minor.
+
+   Total GEM buffer allocation in bytes.
+
+GEM Buffer Ownership
+
+
+For the purpose of cgroup accounting and limiting, ownership of the
+buffer is deemed to be the cgroup for which the allocating process
+belongs to.  There is one cgroup stats per drm device.  Each allocation
+is charged to the owning cgroup as well as all its ancestors.
+
+Similar to the memory cgroup, migrating a process to a different cgroup
+does not move the GEM buffer usages that the process started while in
+previous cgroup, to the new cgroup.
+
+The following is an example to illustrate some of the operations.  Given
+the following cgroup hierarchy (The letters are cgroup names with R
+being the root cgroup.  The numbers in brackets are processes.  The
+processes are placed with cgroup's 'No Internal Process Constraint' in
+mind, so no process is placed in cgroup B.)
+
+R (4, 5) -- A (6)
+ \
+  B  C (7,8)
+   \
+D (9)
+
+Here is a list of operation and the associated effect on the size
+track by the cgroups (for simplicity, each buffer is 1 unit in size.)
+
+==  ==  ==  ==  ==  ===
+R   A   B   C   D   Ops
+==  ==  ==  ==  ==  ===
+1   0   0   0   0   4 allocated a buffer
+1   0   0   0   0   4 shared a buffer with 5
+1   0   0   0   0   4 shared a buffer with 9
+2   0   1   0   1   9 allocated a buffer
+3   0   2   1   1   7 allocated a buffer
+3   0   2   1   1   7 shared a buffer with 8
+3   0   2   1   1   7 sharing with 9
+3   0   2   1   1   7 release a buffer
+3

[PATCH v2 01/11] cgroup: Introduce cgroup for drm subsystem

2020-02-26 Thread Kenny Ho
With the increased importance of machine learning, data science and
other cloud-based applications, GPUs are already in production use in
data centers today.  Existing GPU resource management is very coarse
grain, however, as sysadmins are only able to distribute workload on a
per-GPU basis.  An alternative is to use GPU virtualization (with or
without SRIOV) but it generally acts on the entire GPU instead of the
specific resources in a GPU.  With a drm cgroup controller, we can
enable alternate, fine-grain, sub-GPU resource management (in addition
to what may be available via GPU virtualization.)

Change-Id: Ia90aed8c4cb89ff20d8216a903a765655b44fc9a
Signed-off-by: Kenny Ho 
---
 Documentation/admin-guide/cgroup-v2.rst | 18 -
 Documentation/cgroup-v1/drm.rst |  1 +
 include/linux/cgroup_drm.h  | 92 +
 include/linux/cgroup_subsys.h   |  4 ++
 init/Kconfig|  5 ++
 kernel/cgroup/Makefile  |  1 +
 kernel/cgroup/drm.c | 42 +++
 7 files changed, 161 insertions(+), 2 deletions(-)
 create mode 100644 Documentation/cgroup-v1/drm.rst
 create mode 100644 include/linux/cgroup_drm.h
 create mode 100644 kernel/cgroup/drm.c

diff --git a/Documentation/admin-guide/cgroup-v2.rst 
b/Documentation/admin-guide/cgroup-v2.rst
index 0636bcb60b5a..7deff912185e 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -61,8 +61,10 @@ v1 is available under Documentation/admin-guide/cgroup-v1/.
  5-6. Device
  5-7. RDMA
5-7-1. RDMA Interface Files
- 5-8. Misc
-   5-8-1. perf_event
+ 5-8. GPU
+   5-8-1. GPU Interface Files
+ 5-9. Misc
+   5-9-1. perf_event
  5-N. Non-normative information
5-N-1. CPU controller root cgroup process behaviour
5-N-2. IO controller root cgroup process behaviour
@@ -2057,6 +2059,18 @@ RDMA Interface Files
  ocrdma1 hca_handle=1 hca_object=23
 
 
+GPU
+---
+
+The "gpu" controller regulates the distribution and accounting of
+of GPU-related resources.
+
+GPU Interface Files
+
+
+TODO
+
+
 Misc
 
 
diff --git a/Documentation/cgroup-v1/drm.rst b/Documentation/cgroup-v1/drm.rst
new file mode 100644
index ..5f5658e1f5ed
--- /dev/null
+++ b/Documentation/cgroup-v1/drm.rst
@@ -0,0 +1 @@
+Please see ../cgroup-v2.rst for details
diff --git a/include/linux/cgroup_drm.h b/include/linux/cgroup_drm.h
new file mode 100644
index ..345af54a5d41
--- /dev/null
+++ b/include/linux/cgroup_drm.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: MIT
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ */
+#ifndef _CGROUP_DRM_H
+#define _CGROUP_DRM_H
+
+#include 
+
+#ifdef CONFIG_CGROUP_DRM
+
+/**
+ * The DRM cgroup controller data structure.
+ */
+struct drmcg {
+   struct cgroup_subsys_state  css;
+};
+
+/**
+ * css_to_drmcg - get the corresponding drmcg ref from a cgroup_subsys_state
+ * @css: the target cgroup_subsys_state
+ *
+ * Return: DRM cgroup that contains the @css
+ */
+static inline struct drmcg *css_to_drmcg(struct cgroup_subsys_state *css)
+{
+   return css ? container_of(css, struct drmcg, css) : NULL;
+}
+
+/**
+ * drmcg_get - get the drmcg reference that a task belongs to
+ * @task: the target task
+ *
+ * This increase the reference count of the css that the @task belongs to
+ *
+ * Return: reference to the DRM cgroup the task belongs to
+ */
+static inline struct drmcg *drmcg_get(struct task_struct *task)
+{
+   return css_to_drmcg(task_get_css(task, gpu_cgrp_id));
+}
+
+/**
+ * drmcg_put - put a drmcg reference
+ * @drmcg: the target drmcg
+ *
+ * Put a reference obtained via drmcg_get
+ */
+static inline void drmcg_put(struct drmcg *drmcg)
+{
+   if (drmcg)
+   css_put(>css);
+}
+
+/**
+ * drmcg_parent - find the parent of a drm cgroup
+ * @cg: the target drmcg
+ *
+ * This does not increase the reference count of the parent cgroup
+ *
+ * Return: parent DRM cgroup of @cg
+ */
+static inline struct drmcg *drmcg_parent(struct drmcg *cg)
+{
+   return css_to_drmcg(cg->css.parent);
+}
+
+#else /* CONFIG_CGROUP_DRM */
+
+struct drmcg {
+};
+
+static inline struct drmcg *css_to_drmcg(struct cgroup_subsys_state *css)
+{
+   return NULL;
+}
+
+static inline struct drmcg *drmcg_get(struct task_struct *task)
+{
+   return NULL;
+}
+
+static inline void drmcg_put(struct drmcg *drmcg)
+{
+}
+
+static inline struct drmcg *drmcg_parent(struct drmcg *cg)
+{
+   return NULL;
+}
+
+#endif /* CONFIG_CGROUP_DRM */
+#endif /* _CGROUP_DRM_H */
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index acb77dcff3b4..f4e627942115 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -61,6 +61,10 @@ SUBSYS(pids)
 SUBSYS(rdma)
 #endif
 
+#if IS_ENABLED(CONFIG_CGROUP_DRM)
+SUBSYS(gpu)
+#endif
+
 /*
  * The following subsystems are not supporte

[PATCH v2 00/11] new cgroup controller for gpu/drm subsystem

2020-02-26 Thread Kenny Ho
 instead of the specific resources in a GPU.
With a drm cgroup controller, we can enable alternate, fine-grain, sub-GPU
resource management (in addition to what may be available via GPU
virtualization.)

In addition to production use, the DRM cgroup can also help with testing
graphics application robustness by providing a mean to artificially limit DRM
resources availble to the applications.


Challenges
==
While there are common infrastructure in DRM that is shared across many vendors
(the scheduler [4] for example), there are also aspects of DRM that are vendor
specific.  To accommodate this, we borrowed the mechanism used by the cgroup to
handle different kinds of cgroup controller.

Resources for DRM are also often device (GPU) specific instead of system
specific and a system may contain more than one GPU.  For this, we borrowed
some of the ideas from RDMA cgroup controller.

Approach

To experiment with the idea of a DRM cgroup, we would like to start with basic
accounting and statistics, then continue to iterate and add regulating
mechanisms into the driver.

[1] https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
[2] https://lists.freedesktop.org/archives/intel-gfx/2018-January/153156.html
[3] https://www.spinics.net/lists/cgroups/msg20720.html
[4] https://elixir.bootlin.com/linux/latest/source/drivers/gpu/drm/scheduler
[5] https://kubernetes.io/docs/tasks/manage-gpus/scheduling-gpus/
[6] 
https://blog.openshift.com/gpu-accelerated-sql-queries-with-postgresql-pg-strom-in-openshift-3-10/
[7] https://github.com/RadeonOpenCompute/k8s-device-plugin
[8] https://github.com/kubernetes/kubernetes/issues/52757

Kenny Ho (11):
  cgroup: Introduce cgroup for drm subsystem
  drm, cgroup: Bind drm and cgroup subsystem
  drm, cgroup: Initialize drmcg properties
  drm, cgroup: Add total GEM buffer allocation stats
  drm, cgroup: Add peak GEM buffer allocation stats
  drm, cgroup: Add GEM buffer allocation count stats
  drm, cgroup: Add total GEM buffer allocation limit
  drm, cgroup: Add peak GEM buffer allocation limit
  drm, cgroup: Add compute as gpu cgroup resource
  drm, cgroup: add update trigger after limit change
  drm/amdgpu: Integrate with DRM cgroup

 Documentation/admin-guide/cgroup-v2.rst   | 138 ++-
 Documentation/cgroup-v1/drm.rst   |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|   4 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |  48 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c|   6 +-
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  |   7 +
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |   3 +
 .../amd/amdkfd/kfd_process_queue_manager.c| 153 +++
 drivers/gpu/drm/drm_drv.c |  12 +
 drivers/gpu/drm/drm_gem.c |  16 +-
 include/drm/drm_cgroup.h  |  81 ++
 include/drm/drm_device.h  |   7 +
 include/drm/drm_drv.h |  19 +
 include/drm/drm_gem.h |  12 +-
 include/linux/cgroup_drm.h| 138 +++
 include/linux/cgroup_subsys.h |   4 +
 init/Kconfig  |   5 +
 kernel/cgroup/Makefile|   1 +
 kernel/cgroup/drm.c   | 913 ++
 19 files changed, 1563 insertions(+), 5 deletions(-)
 create mode 100644 Documentation/cgroup-v1/drm.rst
 create mode 100644 include/drm/drm_cgroup.h
 create mode 100644 include/linux/cgroup_drm.h
 create mode 100644 kernel/cgroup/drm.c

-- 
2.25.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v2 02/11] drm, cgroup: Bind drm and cgroup subsystem

2020-02-26 Thread Kenny Ho
Since the drm subsystem can be compiled as a module and drm devices can
be added and removed during run time, add several functions to bind the
drm subsystem as well as drm devices with drmcg.

Two pairs of functions:
drmcg_bind/drmcg_unbind - used to bind/unbind the drm subsystem to the
cgroup subsystem as the drm core initialize/exit.

drmcg_register_dev/drmcg_unregister_dev - used to register/unregister
drm devices to the cgroup subsystem as the devices are presented/removed
from userspace.

Change-Id: I1cb6b2080fc7d27979d886ef23e784341efafb41
---
 drivers/gpu/drm/drm_drv.c  |   8 +++
 include/drm/drm_cgroup.h   |  39 +++
 include/linux/cgroup_drm.h |   4 ++
 kernel/cgroup/drm.c| 131 +
 4 files changed, 182 insertions(+)
 create mode 100644 include/drm/drm_cgroup.h

diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 7c18a980cd4b..e418a61f5c85 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -41,6 +41,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "drm_crtc_internal.h"
 #include "drm_internal.h"
@@ -973,6 +974,8 @@ int drm_dev_register(struct drm_device *dev, unsigned long 
flags)
 
ret = 0;
 
+   drmcg_register_dev(dev);
+
DRM_INFO("Initialized %s %d.%d.%d %s for %s on minor %d\n",
 driver->name, driver->major, driver->minor,
 driver->patchlevel, driver->date,
@@ -1007,6 +1010,8 @@ EXPORT_SYMBOL(drm_dev_register);
  */
 void drm_dev_unregister(struct drm_device *dev)
 {
+   drmcg_unregister_dev(dev);
+
if (drm_core_check_feature(dev, DRIVER_LEGACY))
drm_lastclose(dev);
 
@@ -1113,6 +1118,7 @@ static const struct file_operations drm_stub_fops = {
 
 static void drm_core_exit(void)
 {
+   drmcg_unbind();
unregister_chrdev(DRM_MAJOR, "drm");
debugfs_remove(drm_debugfs_root);
drm_sysfs_destroy();
@@ -1139,6 +1145,8 @@ static int __init drm_core_init(void)
if (ret < 0)
goto error;
 
+   drmcg_bind(_minor_acquire, _dev_put);
+
drm_core_init_complete = true;
 
DRM_DEBUG("Initialized\n");
diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h
new file mode 100644
index ..530c9a0b3238
--- /dev/null
+++ b/include/drm/drm_cgroup.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: MIT
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ */
+#ifndef __DRM_CGROUP_H__
+#define __DRM_CGROUP_H__
+
+#ifdef CONFIG_CGROUP_DRM
+
+void drmcg_bind(struct drm_minor (*(*acq_dm)(unsigned int minor_id)),
+   void (*put_ddev)(struct drm_device *dev));
+
+void drmcg_unbind(void);
+
+void drmcg_register_dev(struct drm_device *dev);
+
+void drmcg_unregister_dev(struct drm_device *dev);
+
+#else
+
+static inline void drmcg_bind(
+   struct drm_minor (*(*acq_dm)(unsigned int minor_id)),
+   void (*put_ddev)(struct drm_device *dev))
+{
+}
+
+static inline void drmcg_unbind(void)
+{
+}
+
+static inline void drmcg_register_dev(struct drm_device *dev)
+{
+}
+
+static inline void drmcg_unregister_dev(struct drm_device *dev)
+{
+}
+
+#endif /* CONFIG_CGROUP_DRM */
+#endif /* __DRM_CGROUP_H__ */
diff --git a/include/linux/cgroup_drm.h b/include/linux/cgroup_drm.h
index 345af54a5d41..307bb75db248 100644
--- a/include/linux/cgroup_drm.h
+++ b/include/linux/cgroup_drm.h
@@ -5,6 +5,10 @@
 #define _CGROUP_DRM_H
 
 #include 
+#include 
+
+/* limit defined per the way drm_minor_alloc operates */
+#define MAX_DRM_DEV (64 * DRM_MINOR_RENDER)
 
 #ifdef CONFIG_CGROUP_DRM
 
diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
index 5e38a8230922..061bb9c458e4 100644
--- a/kernel/cgroup/drm.c
+++ b/kernel/cgroup/drm.c
@@ -1,11 +1,142 @@
 // SPDX-License-Identifier: MIT
 // Copyright 2019 Advanced Micro Devices, Inc.
+#include 
+#include 
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 
 static struct drmcg *root_drmcg __read_mostly;
 
+/* global mutex for drmcg across all devices */
+static DEFINE_MUTEX(drmcg_mutex);
+
+static DECLARE_BITMAP(known_devs, MAX_DRM_DEV);
+
+static struct drm_minor (*(*acquire_drm_minor)(unsigned int minor_id));
+
+static void (*put_drm_dev)(struct drm_device *dev);
+
+/**
+ * drmcg_bind - Bind DRM subsystem to cgroup subsystem
+ * @acq_dm: function pointer to the drm_minor_acquire function
+ * @put_ddev: function pointer to the drm_dev_put function
+ *
+ * This function binds some functions from the DRM subsystem and make
+ * them available to the drmcg subsystem.
+ *
+ * drmcg_unbind does the opposite of this function
+ */
+void drmcg_bind(struct drm_minor (*(*acq_dm)(unsigned int minor_id)),
+   void (*put_ddev)(struct drm_device *dev))
+{
+   mutex_lock(_mutex);
+   acquire_drm_minor = acq_dm;
+   put_drm_dev = put_ddev;
+   mutex_unlock(_mutex);
+}
+EXPORT_SYMBOL(drmcg_bind);
+
+/**
+ * drmcg_unbind - Unbind DRM subsystem from cgroup subsystem
+ *
+ * drmcg_bind 

[PATCH v2 10/11] drm, cgroup: add update trigger after limit change

2020-02-26 Thread Kenny Ho
Before this commit, drmcg limits are updated but enforcement is delayed
until the next time the driver check against the new limit.  While this
is sufficient for certain resources, a more proactive enforcement may be
needed for other resources.

Introducing an optional drmcg_limit_updated callback for the DRM
drivers.  When defined, it will be called in two scenarios:
1) When limits are updated for a particular cgroup, the callback will be
triggered for each task in the updated cgroup.
2) When a task is migrated from one cgroup to another, the callback will
be triggered for each resource type for the migrated task.

Change-Id: I0ce7c4e5a04c31bd0f8d9853a383575d4bc9a3fa
Signed-off-by: Kenny Ho 
---
 include/drm/drm_drv.h | 10 
 kernel/cgroup/drm.c   | 58 +++
 2 files changed, 68 insertions(+)

diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h
index 1f65ac4d9bbf..e7333143e722 100644
--- a/include/drm/drm_drv.h
+++ b/include/drm/drm_drv.h
@@ -724,6 +724,16 @@ struct drm_driver {
void (*drmcg_custom_init)(struct drm_device *dev,
struct drmcg_props *props);
 
+   /**
+* @drmcg_limit_updated
+*
+* Optional callback
+*/
+   void (*drmcg_limit_updated)(struct drm_device *dev,
+   struct task_struct *task,
+   struct drmcg_device_resource *ddr,
+   enum drmcg_res_type res_type);
+
/**
 * @gem_vm_ops: Driver private ops for this object
 *
diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
index 2eadabebdfea..da439a351b07 100644
--- a/kernel/cgroup/drm.c
+++ b/kernel/cgroup/drm.c
@@ -127,6 +127,26 @@ static inline void drmcg_update_cg_tree(struct drm_device 
*dev)
mutex_unlock(_mutex);
 }
 
+static void drmcg_limit_updated(struct drm_device *dev, struct drmcg *drmcg,
+   enum drmcg_res_type res_type)
+{
+   struct drmcg_device_resource *ddr =
+   drmcg->dev_resources[dev->primary->index];
+   struct css_task_iter it;
+   struct task_struct *task;
+
+   if (dev->driver->drmcg_limit_updated == NULL)
+   return;
+
+   css_task_iter_start(>css.cgroup->self,
+   CSS_TASK_ITER_PROCS, );
+   while ((task = css_task_iter_next())) {
+   dev->driver->drmcg_limit_updated(dev, task,
+   ddr, res_type);
+   }
+   css_task_iter_end();
+}
+
 static void drmcg_calculate_effective_compute(struct drm_device *dev,
const unsigned long *free_weighted,
struct drmcg *parent_drmcg)
@@ -208,6 +228,8 @@ static void drmcg_apply_effective_compute(struct drm_device 
*dev)
 capacity);
ddr->compute_count_eff =
bitmap_weight(ddr->compute_eff, capacity);
+
+   drmcg_limit_updated(dev, drmcg, DRMCG_TYPE_COMPUTE);
}
}
rcu_read_unlock();
@@ -732,10 +754,46 @@ static int drmcg_css_online(struct cgroup_subsys_state 
*css)
return drm_minor_for_each(_online_fn, css_to_drmcg(css));
 }
 
+static int drmcg_attach_fn(int id, void *ptr, void *data)
+{
+   struct drm_minor *minor = ptr;
+   struct task_struct *task = data;
+   struct drm_device *dev;
+
+   if (minor->type != DRM_MINOR_PRIMARY)
+   return 0;
+
+   dev = minor->dev;
+
+   if (dev->driver->drmcg_limit_updated) {
+   struct drmcg *drmcg = drmcg_get(task);
+   struct drmcg_device_resource *ddr =
+   drmcg->dev_resources[minor->index];
+   enum drmcg_res_type type;
+
+   for (type = 0; type < __DRMCG_TYPE_LAST; type++)
+   dev->driver->drmcg_limit_updated(dev, task, ddr, type);
+
+   drmcg_put(drmcg);
+   }
+
+   return 0;
+}
+
+static void drmcg_attach(struct cgroup_taskset *tset)
+{
+   struct task_struct *task;
+   struct cgroup_subsys_state *css;
+
+   cgroup_taskset_for_each(task, css, tset)
+   drm_minor_for_each(_attach_fn, task);
+}
+
 struct cgroup_subsys gpu_cgrp_subsys = {
.css_alloc  = drmcg_css_alloc,
.css_free   = drmcg_css_free,
.css_online = drmcg_css_online,
+   .attach = drmcg_attach,
.early_init = false,
.legacy_cftypes = files,
.dfl_cftypes= files,
-- 
2.25.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 09/11] drm, cgroup: Introduce lgpu as DRM cgroup resource

2020-02-19 Thread Kenny Ho
On Wed, Feb 19, 2020 at 11:18 AM Johannes Weiner  wrote:
>
> Yes, I'd go with absolute units when it comes to memory, because it's
> not a renewable resource like CPU and IO, and so we do have cliff
> behavior around the edge where you transition from ok to not-enough.
>
> memory.low is a bit in flux right now, so if anything is unclear
> around its semantics, please feel free to reach out.

I am not familiar with the discussion, would you point me to a
relevant thread please?  In addition, is there some kind of order of
preference for implementing low vs high vs max?

Regards,
Kenny
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 09/11] drm, cgroup: Introduce lgpu as DRM cgroup resource

2020-02-20 Thread Kenny Ho
Thanks, I will take a look.

Regards,
Kenny

On Wed, Feb 19, 2020 at 1:38 PM Johannes Weiner  wrote:
>
> On Wed, Feb 19, 2020 at 11:28:48AM -0500, Kenny Ho wrote:
> > On Wed, Feb 19, 2020 at 11:18 AM Johannes Weiner  wrote:
> > >
> > > Yes, I'd go with absolute units when it comes to memory, because it's
> > > not a renewable resource like CPU and IO, and so we do have cliff
> > > behavior around the edge where you transition from ok to not-enough.
> > >
> > > memory.low is a bit in flux right now, so if anything is unclear
> > > around its semantics, please feel free to reach out.
> >
> > I am not familiar with the discussion, would you point me to a
> > relevant thread please?
>
> Here is a cleanup patch, not yet merged, that documents the exact
> semantics and behavioral considerations:
>
> https://lore.kernel.org/linux-mm/20191213192158.188939-3-han...@cmpxchg.org/
>
> But the high-level idea is this: you assign each cgroup or cgroup
> subtree a chunk of the resource that it's guaranteed to be able to
> consume. It *can* consume beyond that threshold if available, but that
> overage may get reclaimed again if somebody else needs it instead.
>
> This allows you to do a ballpark distribution of the resource between
> different workloads, while the kernel retains the ability to optimize
> allocation of spare resources - because in practice, workload demand
> varies over time, workloads disappear and new ones start up etc.
>
> > In addition, is there some kind of order of preference for
> > implementing low vs high vs max?
>
> If you implement only one allocation model, the preference would be on
> memory.low. Limits are rigid and per definition waste resources, so in
> practice we're moving away from them.
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 09/11] drm, cgroup: Introduce lgpu as DRM cgroup resource

2020-02-14 Thread Kenny Ho
On Fri, Feb 14, 2020 at 1:34 PM Daniel Vetter  wrote:
>
> I think guidance from Tejun in previos discussions was pretty clear that
> he expects cgroups to be both a) standardized and c) sufficient clear
> meaning that end-users have a clear understanding of what happens when
> they change the resource allocation.
>
> I'm not sure lgpu here, at least as specified, passes either.

I disagree (at least on the characterization of the feedback
provided.)  I believe this series satisfied the sprite of Tejun's
guidance so far (the weight knob for lgpu, for example, was
specifically implemented base on his input.)  But, I will let Tejun
speak for himself after he considered the implementation in detail.

Regards,
Kenny


> But I also
> don't have much clue, so pulled Jason in - he understands how this all
> gets reflected to userspace apis a lot better than me.
> -Daniel
>
>
> >
> > > If it's carving up compute power, what's actually being carved up?  Time? 
> > >  Execution units/waves/threads?  Even if that's the case, what advantage 
> > > does it give to have it in terms of a fixed set of lgpus where each 
> > > cgroup gets to pick a fixed set.  Does affinity matter that much?  Why 
> > > not just say how many waves the GPU supports and that they have to be 
> > > allocated in chunks of 16 waves (pulling a number out of thin air) and 
> > > let the cgroup specify how many waves it wants.
> > >
> > > Don't get me wrong here.  I'm all for the notion of being able to use 
> > > cgroups to carve up GPU compute resources.  However, this sounds to me 
> > > like the most AMD-specific solution possible.  We (Intel) could probably 
> > > do some sort of carving up as well but we'd likely want to do it with 
> > > preemption and time-slicing rather than handing out specific EUs.
> >
> > This has been discussed in the RFC before
> > (https://www.spinics.net/lists/cgroups/msg23469.html.)  As mentioned
> > before, the idea of a compute unit is hardly an AMD specific thing as
> > it is in the OpenCL standard and part of the architecture of many
> > different vendors.  In addition, the interface presented here supports
> > Intel's use case.  What you described is what I considered as the
> > "anonymous resources" view of the lgpu.  What you/Intel can do, is to
> > register your device to drmcg to have 100 lgpu and users can specify
> > simply by count.  So if they want to allocate 5% for a cgroup, they
> > would set count=5.  Per the documentation in this patch: "Some DRM
> > devices may only support lgpu as anonymous resources.  In such case,
> > the significance of the position of the set bits in list will be
> > ignored."  What Intel does with the user expressed configuration of "5
> > out of 100" is entirely up to Intel (time slice if you like, change to
> > specific EUs later if you like, or make it driver configurable to
> > support both if you like.)
> >
> > Regards,
> > Kenny
> >
> > >
> > > On Fri, Feb 14, 2020 at 9:57 AM Kenny Ho  wrote:
> > >>
> > >> drm.lgpu
> > >>   A read-write nested-keyed file which exists on all cgroups.
> > >>   Each entry is keyed by the DRM device's major:minor.
> > >>
> > >>   lgpu stands for logical GPU, it is an abstraction used to
> > >>   subdivide a physical DRM device for the purpose of resource
> > >>   management.  This file stores user configuration while the
> > >>   drm.lgpu.effective reflects the actual allocation after
> > >>   considering the relationship between the cgroups and their
> > >>   configurations.
> > >>
> > >>   The lgpu is a discrete quantity that is device specific (i.e.
> > >>   some DRM devices may have 64 lgpus while others may have 100
> > >>   lgpus.)  The lgpu is a single quantity that can be allocated
> > >>   in three different ways denoted by the following nested keys.
> > >>
> > >> = ==
> > >> weightAllocate by proportion in relationship with
> > >>   active sibling cgroups
> > >> count Allocate by amount statically, treat lgpu as
> > >>   anonymous resources
> > >> list  Allocate statically, treat lgpu as named
> > >>   resource
> > >> = ==
> > >>
> > >

Re: [PATCH 09/11] drm, cgroup: Introduce lgpu as DRM cgroup resource

2020-02-14 Thread Kenny Ho
Hi Jason,

Thanks for the review.

On Fri, Feb 14, 2020 at 11:44 AM Jason Ekstrand  wrote:
>
> Pardon my ignorance but I'm a bit confused by this.  What is a "logical GPU"? 
>  What are we subdividing?  Are we carving up memory?  Compute power?  Both?

The intention is compute but it is up to the individual drm driver to decide.

> If it's carving up compute power, what's actually being carved up?  Time?  
> Execution units/waves/threads?  Even if that's the case, what advantage does 
> it give to have it in terms of a fixed set of lgpus where each cgroup gets to 
> pick a fixed set.  Does affinity matter that much?  Why not just say how many 
> waves the GPU supports and that they have to be allocated in chunks of 16 
> waves (pulling a number out of thin air) and let the cgroup specify how many 
> waves it wants.
>
> Don't get me wrong here.  I'm all for the notion of being able to use cgroups 
> to carve up GPU compute resources.  However, this sounds to me like the most 
> AMD-specific solution possible.  We (Intel) could probably do some sort of 
> carving up as well but we'd likely want to do it with preemption and 
> time-slicing rather than handing out specific EUs.

This has been discussed in the RFC before
(https://www.spinics.net/lists/cgroups/msg23469.html.)  As mentioned
before, the idea of a compute unit is hardly an AMD specific thing as
it is in the OpenCL standard and part of the architecture of many
different vendors.  In addition, the interface presented here supports
Intel's use case.  What you described is what I considered as the
"anonymous resources" view of the lgpu.  What you/Intel can do, is to
register your device to drmcg to have 100 lgpu and users can specify
simply by count.  So if they want to allocate 5% for a cgroup, they
would set count=5.  Per the documentation in this patch: "Some DRM
devices may only support lgpu as anonymous resources.  In such case,
the significance of the position of the set bits in list will be
ignored."  What Intel does with the user expressed configuration of "5
out of 100" is entirely up to Intel (time slice if you like, change to
specific EUs later if you like, or make it driver configurable to
support both if you like.)

Regards,
Kenny

>
> On Fri, Feb 14, 2020 at 9:57 AM Kenny Ho  wrote:
>>
>> drm.lgpu
>>   A read-write nested-keyed file which exists on all cgroups.
>>   Each entry is keyed by the DRM device's major:minor.
>>
>>   lgpu stands for logical GPU, it is an abstraction used to
>>   subdivide a physical DRM device for the purpose of resource
>>   management.  This file stores user configuration while the
>>   drm.lgpu.effective reflects the actual allocation after
>>   considering the relationship between the cgroups and their
>>   configurations.
>>
>>   The lgpu is a discrete quantity that is device specific (i.e.
>>   some DRM devices may have 64 lgpus while others may have 100
>>   lgpus.)  The lgpu is a single quantity that can be allocated
>>   in three different ways denoted by the following nested keys.
>>
>> = ==
>> weightAllocate by proportion in relationship with
>>   active sibling cgroups
>> count Allocate by amount statically, treat lgpu as
>>   anonymous resources
>> list  Allocate statically, treat lgpu as named
>>   resource
>> = ==
>>
>>   For example:
>>   226:0 weight=100 count=256 list=0-255
>>   226:1 weight=100 count=4 list=0,2,4,6
>>   226:2 weight=100 count=32 list=32-63
>>   226:3 weight=100 count=0 list=
>>   226:4 weight=500 count=0 list=
>>
>>   lgpu is represented by a bitmap and uses the bitmap_parselist
>>   kernel function so the list key input format is a
>>   comma-separated list of decimal numbers and ranges.
>>
>>   Consecutively set bits are shown as two hyphen-separated decimal
>>   numbers, the smallest and largest bit numbers set in the range.
>>   Optionally each range can be postfixed to denote that only parts
>>   of it should be set.  The range will divided to groups of
>>   specific size.
>>   Syntax: range:used_size/group_size
>>   Example: 0-1023:2/256 ==> 0,1,256,257,512,513,768,769
>>
>>   The count key is the hamming weight / hweight of the bitmap.
>>
>>   Weight, count and list accept the max and default keywords.
>>
>>   Some DRM devices may only support lgpu as anonym

Re: [PATCH 09/11] drm, cgroup: Introduce lgpu as DRM cgroup resource

2020-02-14 Thread Kenny Ho
Hi Tejun,

On Fri, Feb 14, 2020 at 2:17 PM Tejun Heo  wrote:
>
> I have to agree with Daniel here. My apologies if I weren't clear
> enough. Here's one interface I can think of:
>
>  * compute weight: The same format as io.weight. Proportional control
>of gpu compute.
>
>  * memory low: Please see how the system memory.low behaves. For gpus,
>it'll need per-device entries.
>
> Note that for both, there one number to configure and conceptually
> it's pretty clear to everybody what that number means, which is not to
> say that it's clear to implement but it's much better to deal with
> that on this side of the interface than the other.

Can you elaborate, per your understanding, how the lgpu weight
attribute differ from the io.weight you suggested?  Is it merely a
formatting/naming issue or is it the implementation details that you
find troubling?  From my perspective, the weight attribute implements
as you suggested back in RFCv4 (proportional control on top of a unit
- either physical or time unit.)

Perhaps more explicit questions would help me understand what you
mean. If I remove the 'list' and 'count' attributes leaving just
weight, is that satisfactory?  Are you saying the idea of affinity or
named-resource is banned from cgroup entirely (even though it exists
in the form of cpuset already and users are interested in having such
options [i.e. userspace OpenCL] when needed?)

To be clear, I am not saying no proportional control.  I am saying
give the user the options, which is what has been implemented.

> cc'ing Johannes. Do you have anything on mind regarding how gpu memory
> configuration should look like? e.g. should it go w/ weights rather
> than absoulte units (I don't think so given that it'll most likely
> need limits at some point too but still and there are benefits from
> staying consistent with system memory).
>
> Also, a rather trivial high level question. Is drm a good controller
> name given that other controller names are like cpu, memory, io?

There was a discussion about naming early in the RFC (I believe
RFCv2), the consensuses then was to use drmcg to align with the drm
subsystem.  I have no problem renaming it to gpucg  or something
similar if that is the last thing that's blocking acceptance.  For
now, I would like to get some clarity on the implementation before
having more code churn.

Regards,
Kenny


> Thanks.
>
> --
> tejun
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 03/11] drm, cgroup: Initialize drmcg properties

2020-02-14 Thread Kenny Ho
drmcg initialization involves allocating a per cgroup, per device data
structure and setting the defaults.  There are two entry points for
drmcg init:

1) When struct drmcg is created via css_alloc, initialization is done
for each device

2) When DRM devices are created after drmcgs are created
  a) Per device drmcg data structure is allocated at the beginning of
  DRM device creation such that drmcg can begin tracking usage
  statistics
  b) At the end of DRM device creation, drmcg_register_dev will update in
  case device specific defaults need to be applied.

Entry point #2 usually applies to the root cgroup since it can be
created before DRM devices are available.  The drmcg controller will go
through all existing drm cgroups and initialize them with the new device
accordingly.

Change-Id: I64e421d8dfcc22ee8282cc1305960e20c2704db7
Signed-off-by: Kenny Ho 
---
 drivers/gpu/drm/drm_drv.c  |   4 ++
 include/drm/drm_cgroup.h   |  18 +++
 include/drm/drm_device.h   |   7 +++
 include/drm/drm_drv.h  |   9 
 include/linux/cgroup_drm.h |  12 +
 kernel/cgroup/drm.c| 105 +
 6 files changed, 155 insertions(+)

diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 8e59cc5a5bde..44a66edc81c2 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -643,6 +643,7 @@ int drm_dev_init(struct drm_device *dev,
mutex_init(>filelist_mutex);
mutex_init(>clientlist_mutex);
mutex_init(>master_mutex);
+   mutex_init(>drmcg_mutex);
 
dev->anon_inode = drm_fs_inode_new();
if (IS_ERR(dev->anon_inode)) {
@@ -679,6 +680,7 @@ int drm_dev_init(struct drm_device *dev,
if (ret)
goto err_setunique;
 
+   drmcg_device_early_init(dev);
return 0;
 
 err_setunique:
@@ -693,6 +695,7 @@ int drm_dev_init(struct drm_device *dev,
drm_fs_inode_free(dev->anon_inode);
 err_free:
put_device(dev->dev);
+   mutex_destroy(>drmcg_mutex);
mutex_destroy(>master_mutex);
mutex_destroy(>clientlist_mutex);
mutex_destroy(>filelist_mutex);
@@ -769,6 +772,7 @@ void drm_dev_fini(struct drm_device *dev)
 
put_device(dev->dev);
 
+   mutex_destroy(>drmcg_mutex);
mutex_destroy(>master_mutex);
mutex_destroy(>clientlist_mutex);
mutex_destroy(>filelist_mutex);
diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h
index 530c9a0b3238..fda426fba035 100644
--- a/include/drm/drm_cgroup.h
+++ b/include/drm/drm_cgroup.h
@@ -4,8 +4,17 @@
 #ifndef __DRM_CGROUP_H__
 #define __DRM_CGROUP_H__
 
+#include 
+
 #ifdef CONFIG_CGROUP_DRM
 
+/**
+ * Per DRM device properties for DRM cgroup controller for the purpose
+ * of storing per device defaults
+ */
+struct drmcg_props {
+};
+
 void drmcg_bind(struct drm_minor (*(*acq_dm)(unsigned int minor_id)),
void (*put_ddev)(struct drm_device *dev));
 
@@ -15,8 +24,13 @@ void drmcg_register_dev(struct drm_device *dev);
 
 void drmcg_unregister_dev(struct drm_device *dev);
 
+void drmcg_device_early_init(struct drm_device *device);
+
 #else
 
+struct drmcg_props {
+};
+
 static inline void drmcg_bind(
struct drm_minor (*(*acq_dm)(unsigned int minor_id)),
void (*put_ddev)(struct drm_device *dev))
@@ -35,5 +49,9 @@ static inline void drmcg_unregister_dev(struct drm_device 
*dev)
 {
 }
 
+static inline void drmcg_device_early_init(struct drm_device *device)
+{
+}
+
 #endif /* CONFIG_CGROUP_DRM */
 #endif /* __DRM_CGROUP_H__ */
diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h
index 1acfc3bbd3fb..a94598b8f670 100644
--- a/include/drm/drm_device.h
+++ b/include/drm/drm_device.h
@@ -8,6 +8,7 @@
 
 #include 
 #include 
+#include 
 
 struct drm_driver;
 struct drm_minor;
@@ -308,6 +309,12 @@ struct drm_device {
 */
struct drm_fb_helper *fb_helper;
 
+/** \name DRM Cgroup */
+   /*@{ */
+   struct mutex drmcg_mutex;
+   struct drmcg_props drmcg_props;
+   /*@} */
+
/* Everything below here is for legacy driver, never use! */
/* private: */
 #if IS_ENABLED(CONFIG_DRM_LEGACY)
diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h
index cf13470810a5..1f65ac4d9bbf 100644
--- a/include/drm/drm_drv.h
+++ b/include/drm/drm_drv.h
@@ -715,6 +715,15 @@ struct drm_driver {
struct drm_device *dev,
uint32_t handle);
 
+   /**
+* @drmcg_custom_init
+*
+* Optional callback used to initialize drm cgroup per device properties
+* such as resource limit defaults.
+*/
+   void (*drmcg_custom_init)(struct drm_device *dev,
+   struct drmcg_props *props);
+
/**
 * @gem_vm_ops: Driver private ops for this object
 *
diff --git a/include/linux/cgroup_drm.h b/include/linux/cgroup_drm.h
index 85

[PATCH 02/11] drm, cgroup: Bind drm and cgroup subsystem

2020-02-14 Thread Kenny Ho
Since the drm subsystem can be compiled as a module and drm devices can
be added and removed during run time, add several functions to bind the
drm subsystem as well as drm devices with drmcg.

Two pairs of functions:
drmcg_bind/drmcg_unbind - used to bind/unbind the drm subsystem to the
cgroup subsystem as the drm core initialize/exit.

drmcg_register_dev/drmcg_unregister_dev - used to register/unregister
drm devices to the cgroup subsystem as the devices are presented/removed
from userspace.

Change-Id: I1cb6b2080fc7d27979d886ef23e784341efafb41
---
 drivers/gpu/drm/drm_drv.c  |   8 +++
 include/drm/drm_cgroup.h   |  39 +++
 include/linux/cgroup_drm.h |   4 ++
 kernel/cgroup/drm.c| 131 +
 4 files changed, 182 insertions(+)
 create mode 100644 include/drm/drm_cgroup.h

diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 1b9b40a1c7c9..8e59cc5a5bde 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -41,6 +41,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "drm_crtc_internal.h"
 #include "drm_internal.h"
@@ -972,6 +973,8 @@ int drm_dev_register(struct drm_device *dev, unsigned long 
flags)
 
ret = 0;
 
+   drmcg_register_dev(dev);
+
DRM_INFO("Initialized %s %d.%d.%d %s for %s on minor %d\n",
 driver->name, driver->major, driver->minor,
 driver->patchlevel, driver->date,
@@ -1006,6 +1009,8 @@ EXPORT_SYMBOL(drm_dev_register);
  */
 void drm_dev_unregister(struct drm_device *dev)
 {
+   drmcg_unregister_dev(dev);
+
if (drm_core_check_feature(dev, DRIVER_LEGACY))
drm_lastclose(dev);
 
@@ -1112,6 +1117,7 @@ static const struct file_operations drm_stub_fops = {
 
 static void drm_core_exit(void)
 {
+   drmcg_unbind();
unregister_chrdev(DRM_MAJOR, "drm");
debugfs_remove(drm_debugfs_root);
drm_sysfs_destroy();
@@ -1138,6 +1144,8 @@ static int __init drm_core_init(void)
if (ret < 0)
goto error;
 
+   drmcg_bind(_minor_acquire, _dev_put);
+
drm_core_init_complete = true;
 
DRM_DEBUG("Initialized\n");
diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h
new file mode 100644
index ..530c9a0b3238
--- /dev/null
+++ b/include/drm/drm_cgroup.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: MIT
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ */
+#ifndef __DRM_CGROUP_H__
+#define __DRM_CGROUP_H__
+
+#ifdef CONFIG_CGROUP_DRM
+
+void drmcg_bind(struct drm_minor (*(*acq_dm)(unsigned int minor_id)),
+   void (*put_ddev)(struct drm_device *dev));
+
+void drmcg_unbind(void);
+
+void drmcg_register_dev(struct drm_device *dev);
+
+void drmcg_unregister_dev(struct drm_device *dev);
+
+#else
+
+static inline void drmcg_bind(
+   struct drm_minor (*(*acq_dm)(unsigned int minor_id)),
+   void (*put_ddev)(struct drm_device *dev))
+{
+}
+
+static inline void drmcg_unbind(void)
+{
+}
+
+static inline void drmcg_register_dev(struct drm_device *dev)
+{
+}
+
+static inline void drmcg_unregister_dev(struct drm_device *dev)
+{
+}
+
+#endif /* CONFIG_CGROUP_DRM */
+#endif /* __DRM_CGROUP_H__ */
diff --git a/include/linux/cgroup_drm.h b/include/linux/cgroup_drm.h
index ba7981ac3afc..854591bbb430 100644
--- a/include/linux/cgroup_drm.h
+++ b/include/linux/cgroup_drm.h
@@ -5,6 +5,10 @@
 #define _CGROUP_DRM_H
 
 #include 
+#include 
+
+/* limit defined per the way drm_minor_alloc operates */
+#define MAX_DRM_DEV (64 * DRM_MINOR_RENDER)
 
 #ifdef CONFIG_CGROUP_DRM
 
diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
index e97861b3cb30..37f98dc47268 100644
--- a/kernel/cgroup/drm.c
+++ b/kernel/cgroup/drm.c
@@ -1,11 +1,142 @@
 // SPDX-License-Identifier: MIT
 // Copyright 2019 Advanced Micro Devices, Inc.
+#include 
+#include 
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 
 static struct drmcg *root_drmcg __read_mostly;
 
+/* global mutex for drmcg across all devices */
+static DEFINE_MUTEX(drmcg_mutex);
+
+static DECLARE_BITMAP(known_devs, MAX_DRM_DEV);
+
+static struct drm_minor (*(*acquire_drm_minor)(unsigned int minor_id));
+
+static void (*put_drm_dev)(struct drm_device *dev);
+
+/**
+ * drmcg_bind - Bind DRM subsystem to cgroup subsystem
+ * @acq_dm: function pointer to the drm_minor_acquire function
+ * @put_ddev: function pointer to the drm_dev_put function
+ *
+ * This function binds some functions from the DRM subsystem and make
+ * them available to the drmcg subsystem.
+ *
+ * drmcg_unbind does the opposite of this function
+ */
+void drmcg_bind(struct drm_minor (*(*acq_dm)(unsigned int minor_id)),
+   void (*put_ddev)(struct drm_device *dev))
+{
+   mutex_lock(_mutex);
+   acquire_drm_minor = acq_dm;
+   put_drm_dev = put_ddev;
+   mutex_unlock(_mutex);
+}
+EXPORT_SYMBOL(drmcg_bind);
+
+/**
+ * drmcg_unbind - Unbind DRM subsystem from cgroup subsystem
+ *
+ * drmcg_bind 

[PATCH 08/11] drm, cgroup: Add peak GEM buffer allocation limit

2020-02-14 Thread Kenny Ho
drm.buffer.peak.default
A read-only flat-keyed file which exists on the root cgroup.
Each entry is keyed by the drm device's major:minor.

Default limits on the largest GEM buffer allocation in bytes.

drm.buffer.peak.max
A read-write flat-keyed file which exists on all cgroups.  Each
entry is keyed by the drm device's major:minor.

Per device limits on the largest GEM buffer allocation in bytes.
This is a hard limit.  Attempts in allocating beyond the cgroup
limit will result in ENOMEM.  Shorthand understood by memparse
(such as k, m, g) can be used.

Set largest allocation for /dev/dri/card1 to 4MB
echo "226:1 4m" > drm.buffer.peak.max

Change-Id: I5ab3fb4a442b6cbd5db346be595897c90217da69
Signed-off-by: Kenny Ho 
---
 Documentation/admin-guide/cgroup-v2.rst | 18 +++
 include/drm/drm_cgroup.h|  1 +
 include/linux/cgroup_drm.h  |  1 +
 kernel/cgroup/drm.c | 43 +
 4 files changed, 63 insertions(+)

diff --git a/Documentation/admin-guide/cgroup-v2.rst 
b/Documentation/admin-guide/cgroup-v2.rst
index 064172df63e2..ce5dc027366a 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -2102,6 +2102,24 @@ DRM Interface Files
Set allocation limit for /dev/dri/card0 to 512MB
echo "226:0 512m" > drm.buffer.total.max
 
+  drm.buffer.peak.default
+   A read-only flat-keyed file which exists on the root cgroup.
+   Each entry is keyed by the drm device's major:minor.
+
+   Default limits on the largest GEM buffer allocation in bytes.
+
+  drm.buffer.peak.max
+   A read-write flat-keyed file which exists on all cgroups.  Each
+   entry is keyed by the drm device's major:minor.
+
+   Per device limits on the largest GEM buffer allocation in bytes.
+   This is a hard limit.  Attempts in allocating beyond the cgroup
+   limit will result in ENOMEM.  Shorthand understood by memparse
+   (such as k, m, g) can be used.
+
+   Set largest allocation for /dev/dri/card1 to 4MB
+   echo "226:1 4m" > drm.buffer.peak.max
+
 GEM Buffer Ownership
 
 
diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h
index 2783e56690db..2b41d4d22e33 100644
--- a/include/drm/drm_cgroup.h
+++ b/include/drm/drm_cgroup.h
@@ -16,6 +16,7 @@ struct drmcg_props {
boollimit_enforced;
 
s64 bo_limits_total_allocated_default;
+   s64 bo_limits_peak_allocated_default;
 };
 
 void drmcg_bind(struct drm_minor (*(*acq_dm)(unsigned int minor_id)),
diff --git a/include/linux/cgroup_drm.h b/include/linux/cgroup_drm.h
index b03d90623763..eae400f3d9b4 100644
--- a/include/linux/cgroup_drm.h
+++ b/include/linux/cgroup_drm.h
@@ -29,6 +29,7 @@ struct drmcg_device_resource {
s64 bo_limits_total_allocated;
 
s64 bo_stats_peak_allocated;
+   s64 bo_limits_peak_allocated;
 
s64 bo_stats_count_allocated;
 };
diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
index ee85482edd90..5fcbbc13fa1c 100644
--- a/kernel/cgroup/drm.c
+++ b/kernel/cgroup/drm.c
@@ -95,6 +95,9 @@ static inline int init_drmcg_single(struct drmcg *drmcg, 
struct drm_device *dev)
ddr->bo_limits_total_allocated =
dev->drmcg_props.bo_limits_total_allocated_default;
 
+   ddr->bo_limits_peak_allocated =
+   dev->drmcg_props.bo_limits_peak_allocated_default;
+
return 0;
 }
 
@@ -305,6 +308,9 @@ static void drmcg_print_limits(struct drmcg_device_resource 
*ddr,
case DRMCG_TYPE_BO_TOTAL:
seq_printf(sf, "%lld\n", ddr->bo_limits_total_allocated);
break;
+   case DRMCG_TYPE_BO_PEAK:
+   seq_printf(sf, "%lld\n", ddr->bo_limits_peak_allocated);
+   break;
default:
seq_puts(sf, "\n");
break;
@@ -319,6 +325,10 @@ static void drmcg_print_default(struct drmcg_props *props,
seq_printf(sf, "%lld\n",
props->bo_limits_total_allocated_default);
break;
+   case DRMCG_TYPE_BO_PEAK:
+   seq_printf(sf, "%lld\n",
+   props->bo_limits_peak_allocated_default);
+   break;
default:
seq_puts(sf, "\n");
break;
@@ -476,6 +486,19 @@ static ssize_t drmcg_limit_write(struct kernfs_open_file 
*of, char *buf,
 
ddr->bo_limits_total_allocated = val;
break;
+   case DRMCG_TYPE_BO_PEAK:
+   rc = drmcg_process_limit_s64_val

  1   2   >