Re: [Freedreno] [PATCH 09/13] drm/msm: Use the DRM common Scheduler

2018-10-03 Thread Sharat Masetty
Thanks for the review comments Jordan. I tried to answer a few queries.. 
please check.


On 10/2/2018 12:32 AM, Jordan Crouse wrote:

On Mon, Oct 01, 2018 at 06:01:41PM +0530, Sharat Masetty wrote:

This patch hooks up the DRM gpu scheduler to the msm DRM driver. The
most noticeable changes to the driver are as follows. The submit path is
split into two parts, in the user context the submit(job) is created and
added to one of the entity's scheduler run queue. The scheduler then
tries to drain the queue by submitting the jobs the hardware to act upon.
The submit job sits on the scheduler queue until all the dependent
fences are waited upon successfully.

We have one scheduler instance per ring. The submitqueues will host a
scheduler entity object. This entity will be mapped to the scheduler's
default runqueue. This should be good for now, but in future it is possible
to extend the API to allow for scheduling amongst the submitqueues on the
same ring.

With this patch the role of the struct_mutex lock has been greatly reduced in
scope in the submit path, evidently when actually putting the job on the
ringbuffer. This should enable us with increased parallelism in the
driver which should translate to better performance overall hopefully.

Signed-off-by: Sharat Masetty 
---
  drivers/gpu/drm/msm/Kconfig   |   1 +
  drivers/gpu/drm/msm/Makefile  |   3 +-
  drivers/gpu/drm/msm/msm_drv.h |   3 +-
  drivers/gpu/drm/msm/msm_gem.c |   8 +-
  drivers/gpu/drm/msm/msm_gem.h |   6 +
  drivers/gpu/drm/msm/msm_gem_submit.c  | 138 +--
  drivers/gpu/drm/msm/msm_gpu.c |  72 ++--
  drivers/gpu/drm/msm/msm_gpu.h |   2 +
  drivers/gpu/drm/msm/msm_ringbuffer.c  |   7 +
  drivers/gpu/drm/msm/msm_ringbuffer.h  |   3 +
  drivers/gpu/drm/msm/msm_sched.c   | 313 ++
  drivers/gpu/drm/msm/msm_sched.h   |  18 ++
  drivers/gpu/drm/msm/msm_submitqueue.c |  18 +-
  13 files changed, 507 insertions(+), 85 deletions(-)
  create mode 100644 drivers/gpu/drm/msm/msm_sched.c
  create mode 100644 drivers/gpu/drm/msm/msm_sched.h

diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig
index 38cbde9..0d01810 100644
--- a/drivers/gpu/drm/msm/Kconfig
+++ b/drivers/gpu/drm/msm/Kconfig
@@ -15,6 +15,7 @@ config DRM_MSM
select SND_SOC_HDMI_CODEC if SND_SOC
select SYNC_FILE
select PM_OPP
+   select DRM_SCHED
default y
help
  DRM/KMS driver for MSM/snapdragon.
diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
index cd40c05..71ed921 100644
--- a/drivers/gpu/drm/msm/Makefile
+++ b/drivers/gpu/drm/msm/Makefile
@@ -60,7 +60,8 @@ msm-y := \
msm_perf.o \
msm_rd.o \
msm_ringbuffer.o \
-   msm_submitqueue.o
+   msm_submitqueue.o \
+   msm_sched.o
  
  msm-$(CONFIG_DEBUG_FS) += adreno/a5xx_debugfs.o
  
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h

index b2da1fb..e461a9c 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -213,8 +213,7 @@ struct drm_gem_object *msm_gem_prime_import_sg_table(struct 
drm_device *dev,
  int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv);
  int msm_gem_sync_object(struct drm_gem_object *obj,
struct msm_fence_context *fctx, bool exclusive);
-void msm_gem_move_to_active(struct drm_gem_object *obj,
-   struct msm_gpu *gpu, bool exclusive, struct dma_fence *fence);
+void msm_gem_move_to_active(struct drm_gem_object *obj, struct msm_gpu *gpu);
  void msm_gem_move_to_inactive(struct drm_gem_object *obj);
  int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op, ktime_t 
*timeout);
  int msm_gem_cpu_fini(struct drm_gem_object *obj);
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index f583bb4..7a12f30 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -663,16 +663,12 @@ int msm_gem_sync_object(struct drm_gem_object *obj,
return 0;
  }
  
-void msm_gem_move_to_active(struct drm_gem_object *obj,

-   struct msm_gpu *gpu, bool exclusive, struct dma_fence *fence)
+void msm_gem_move_to_active(struct drm_gem_object *obj, struct msm_gpu *gpu)
  {
struct msm_gem_object *msm_obj = to_msm_bo(obj);
WARN_ON(msm_obj->madv != MSM_MADV_WILLNEED);
msm_obj->gpu = gpu;
-   if (exclusive)
-   reservation_object_add_excl_fence(msm_obj->resv, fence);
-   else
-   reservation_object_add_shared_fence(msm_obj->resv, fence);
+
list_del_init(_obj->mm_list);
list_add_tail(_obj->mm_list, >active_list);
  }
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index cae3aa5..8c6269f 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -20,6 +20,7 @@
  
  #include 

  #include 
+#include 
  #include "msm_drv.h"
  
  /* Additional 

Re: [Freedreno] [PATCH 09/13] drm/msm: Use the DRM common Scheduler

2018-10-01 Thread Jordan Crouse
On Mon, Oct 01, 2018 at 06:01:41PM +0530, Sharat Masetty wrote:
> This patch hooks up the DRM gpu scheduler to the msm DRM driver. The
> most noticeable changes to the driver are as follows. The submit path is
> split into two parts, in the user context the submit(job) is created and
> added to one of the entity's scheduler run queue. The scheduler then
> tries to drain the queue by submitting the jobs the hardware to act upon.
> The submit job sits on the scheduler queue until all the dependent
> fences are waited upon successfully.
> 
> We have one scheduler instance per ring. The submitqueues will host a
> scheduler entity object. This entity will be mapped to the scheduler's
> default runqueue. This should be good for now, but in future it is possible
> to extend the API to allow for scheduling amongst the submitqueues on the
> same ring.
> 
> With this patch the role of the struct_mutex lock has been greatly reduced in
> scope in the submit path, evidently when actually putting the job on the
> ringbuffer. This should enable us with increased parallelism in the
> driver which should translate to better performance overall hopefully.
> 
> Signed-off-by: Sharat Masetty 
> ---
>  drivers/gpu/drm/msm/Kconfig   |   1 +
>  drivers/gpu/drm/msm/Makefile  |   3 +-
>  drivers/gpu/drm/msm/msm_drv.h |   3 +-
>  drivers/gpu/drm/msm/msm_gem.c |   8 +-
>  drivers/gpu/drm/msm/msm_gem.h |   6 +
>  drivers/gpu/drm/msm/msm_gem_submit.c  | 138 +--
>  drivers/gpu/drm/msm/msm_gpu.c |  72 ++--
>  drivers/gpu/drm/msm/msm_gpu.h |   2 +
>  drivers/gpu/drm/msm/msm_ringbuffer.c  |   7 +
>  drivers/gpu/drm/msm/msm_ringbuffer.h  |   3 +
>  drivers/gpu/drm/msm/msm_sched.c   | 313 
> ++
>  drivers/gpu/drm/msm/msm_sched.h   |  18 ++
>  drivers/gpu/drm/msm/msm_submitqueue.c |  18 +-
>  13 files changed, 507 insertions(+), 85 deletions(-)
>  create mode 100644 drivers/gpu/drm/msm/msm_sched.c
>  create mode 100644 drivers/gpu/drm/msm/msm_sched.h
> 
> diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig
> index 38cbde9..0d01810 100644
> --- a/drivers/gpu/drm/msm/Kconfig
> +++ b/drivers/gpu/drm/msm/Kconfig
> @@ -15,6 +15,7 @@ config DRM_MSM
>   select SND_SOC_HDMI_CODEC if SND_SOC
>   select SYNC_FILE
>   select PM_OPP
> + select DRM_SCHED
>   default y
>   help
> DRM/KMS driver for MSM/snapdragon.
> diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
> index cd40c05..71ed921 100644
> --- a/drivers/gpu/drm/msm/Makefile
> +++ b/drivers/gpu/drm/msm/Makefile
> @@ -60,7 +60,8 @@ msm-y := \
>   msm_perf.o \
>   msm_rd.o \
>   msm_ringbuffer.o \
> - msm_submitqueue.o
> + msm_submitqueue.o \
> + msm_sched.o
>  
>  msm-$(CONFIG_DEBUG_FS) += adreno/a5xx_debugfs.o
>  
> diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
> index b2da1fb..e461a9c 100644
> --- a/drivers/gpu/drm/msm/msm_drv.h
> +++ b/drivers/gpu/drm/msm/msm_drv.h
> @@ -213,8 +213,7 @@ struct drm_gem_object 
> *msm_gem_prime_import_sg_table(struct drm_device *dev,
>  int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv);
>  int msm_gem_sync_object(struct drm_gem_object *obj,
>   struct msm_fence_context *fctx, bool exclusive);
> -void msm_gem_move_to_active(struct drm_gem_object *obj,
> - struct msm_gpu *gpu, bool exclusive, struct dma_fence *fence);
> +void msm_gem_move_to_active(struct drm_gem_object *obj, struct msm_gpu *gpu);
>  void msm_gem_move_to_inactive(struct drm_gem_object *obj);
>  int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op, ktime_t 
> *timeout);
>  int msm_gem_cpu_fini(struct drm_gem_object *obj);
> diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
> index f583bb4..7a12f30 100644
> --- a/drivers/gpu/drm/msm/msm_gem.c
> +++ b/drivers/gpu/drm/msm/msm_gem.c
> @@ -663,16 +663,12 @@ int msm_gem_sync_object(struct drm_gem_object *obj,
>   return 0;
>  }
>  
> -void msm_gem_move_to_active(struct drm_gem_object *obj,
> - struct msm_gpu *gpu, bool exclusive, struct dma_fence *fence)
> +void msm_gem_move_to_active(struct drm_gem_object *obj, struct msm_gpu *gpu)
>  {
>   struct msm_gem_object *msm_obj = to_msm_bo(obj);
>   WARN_ON(msm_obj->madv != MSM_MADV_WILLNEED);
>   msm_obj->gpu = gpu;
> - if (exclusive)
> - reservation_object_add_excl_fence(msm_obj->resv, fence);
> - else
> - reservation_object_add_shared_fence(msm_obj->resv, fence);
> +
>   list_del_init(_obj->mm_list);
>   list_add_tail(_obj->mm_list, >active_list);
>  }
> diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
> index cae3aa5..8c6269f 100644
> --- a/drivers/gpu/drm/msm/msm_gem.h
> +++ b/drivers/gpu/drm/msm/msm_gem.h
> @@ -20,6 +20,7 @@
>  
>  #include 
>  #include 
> +#include 
>  #include "msm_drv.h"
>  
>  /* 

[Freedreno] [PATCH 09/13] drm/msm: Use the DRM common Scheduler

2018-10-01 Thread Sharat Masetty
This patch hooks up the DRM gpu scheduler to the msm DRM driver. The
most noticeable changes to the driver are as follows. The submit path is
split into two parts, in the user context the submit(job) is created and
added to one of the entity's scheduler run queue. The scheduler then
tries to drain the queue by submitting the jobs the hardware to act upon.
The submit job sits on the scheduler queue until all the dependent
fences are waited upon successfully.

We have one scheduler instance per ring. The submitqueues will host a
scheduler entity object. This entity will be mapped to the scheduler's
default runqueue. This should be good for now, but in future it is possible
to extend the API to allow for scheduling amongst the submitqueues on the
same ring.

With this patch the role of the struct_mutex lock has been greatly reduced in
scope in the submit path, evidently when actually putting the job on the
ringbuffer. This should enable us with increased parallelism in the
driver which should translate to better performance overall hopefully.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/Kconfig   |   1 +
 drivers/gpu/drm/msm/Makefile  |   3 +-
 drivers/gpu/drm/msm/msm_drv.h |   3 +-
 drivers/gpu/drm/msm/msm_gem.c |   8 +-
 drivers/gpu/drm/msm/msm_gem.h |   6 +
 drivers/gpu/drm/msm/msm_gem_submit.c  | 138 +--
 drivers/gpu/drm/msm/msm_gpu.c |  72 ++--
 drivers/gpu/drm/msm/msm_gpu.h |   2 +
 drivers/gpu/drm/msm/msm_ringbuffer.c  |   7 +
 drivers/gpu/drm/msm/msm_ringbuffer.h  |   3 +
 drivers/gpu/drm/msm/msm_sched.c   | 313 ++
 drivers/gpu/drm/msm/msm_sched.h   |  18 ++
 drivers/gpu/drm/msm/msm_submitqueue.c |  18 +-
 13 files changed, 507 insertions(+), 85 deletions(-)
 create mode 100644 drivers/gpu/drm/msm/msm_sched.c
 create mode 100644 drivers/gpu/drm/msm/msm_sched.h

diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig
index 38cbde9..0d01810 100644
--- a/drivers/gpu/drm/msm/Kconfig
+++ b/drivers/gpu/drm/msm/Kconfig
@@ -15,6 +15,7 @@ config DRM_MSM
select SND_SOC_HDMI_CODEC if SND_SOC
select SYNC_FILE
select PM_OPP
+   select DRM_SCHED
default y
help
  DRM/KMS driver for MSM/snapdragon.
diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
index cd40c05..71ed921 100644
--- a/drivers/gpu/drm/msm/Makefile
+++ b/drivers/gpu/drm/msm/Makefile
@@ -60,7 +60,8 @@ msm-y := \
msm_perf.o \
msm_rd.o \
msm_ringbuffer.o \
-   msm_submitqueue.o
+   msm_submitqueue.o \
+   msm_sched.o
 
 msm-$(CONFIG_DEBUG_FS) += adreno/a5xx_debugfs.o
 
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index b2da1fb..e461a9c 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -213,8 +213,7 @@ struct drm_gem_object *msm_gem_prime_import_sg_table(struct 
drm_device *dev,
 int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv);
 int msm_gem_sync_object(struct drm_gem_object *obj,
struct msm_fence_context *fctx, bool exclusive);
-void msm_gem_move_to_active(struct drm_gem_object *obj,
-   struct msm_gpu *gpu, bool exclusive, struct dma_fence *fence);
+void msm_gem_move_to_active(struct drm_gem_object *obj, struct msm_gpu *gpu);
 void msm_gem_move_to_inactive(struct drm_gem_object *obj);
 int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op, ktime_t 
*timeout);
 int msm_gem_cpu_fini(struct drm_gem_object *obj);
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index f583bb4..7a12f30 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -663,16 +663,12 @@ int msm_gem_sync_object(struct drm_gem_object *obj,
return 0;
 }
 
-void msm_gem_move_to_active(struct drm_gem_object *obj,
-   struct msm_gpu *gpu, bool exclusive, struct dma_fence *fence)
+void msm_gem_move_to_active(struct drm_gem_object *obj, struct msm_gpu *gpu)
 {
struct msm_gem_object *msm_obj = to_msm_bo(obj);
WARN_ON(msm_obj->madv != MSM_MADV_WILLNEED);
msm_obj->gpu = gpu;
-   if (exclusive)
-   reservation_object_add_excl_fence(msm_obj->resv, fence);
-   else
-   reservation_object_add_shared_fence(msm_obj->resv, fence);
+
list_del_init(_obj->mm_list);
list_add_tail(_obj->mm_list, >active_list);
 }
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index cae3aa5..8c6269f 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -20,6 +20,7 @@
 
 #include 
 #include 
+#include 
 #include "msm_drv.h"
 
 /* Additional internal-use only BO flags: */
@@ -136,6 +137,7 @@ enum msm_gem_lock {
  * lasts for the duration of the submit-ioctl.
  */
 struct msm_gem_submit {
+   struct drm_sched_job sched_job;
struct drm_device *dev;
struct