Re: [Intel-gfx] [PATCH V2 6/6] drm/i915:Use the coarse ping-pong mechanism based on drm fd to dispatch the BSD command on BDW GT3

2014-04-14 Thread Daniel Vetter
On Mon, Apr 14, 2014 at 12:21:44PM +0800, Zhao Yakui wrote:
 V1-V2: Follow Daniel's comment and use the simple ping-pong mechanism.
 This is only to add the support of dual BSD rings on BDW GT3 machine.
 The further optimization will be considered in another patch set.
 
 The BDW GT3 has two independent BSD rings, which can be used to process the
 video commands. To be simpler, it is transparent to user-space driver/middle.
 Instead the kernel driver will decide which ring is to dispatch the BSD video
 command.
 
 As every BSD ring is powerful, it is enough to dispatch the BSD video command
 based on the drm fd. In such case it can play back video stream while encoding
 another video stream. The coarse ping-pong mechanism is used to determine
 which BSD ring is used to dispatch the BSD video command.
 
 Signed-off-by: Zhao Yakui yakui.z...@intel.com
 ---
  drivers/gpu/drm/i915/i915_dma.c|3 +++
  drivers/gpu/drm/i915/i915_drv.h|3 +++
  drivers/gpu/drm/i915/i915_gem_execbuffer.c |   37 
 +++-
  3 files changed, 42 insertions(+), 1 deletion(-)
 
 diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
 index 0b38f88..4d27cf4 100644
 --- a/drivers/gpu/drm/i915/i915_dma.c
 +++ b/drivers/gpu/drm/i915/i915_dma.c
 @@ -1572,6 +1572,7 @@ int i915_driver_load(struct drm_device *dev, unsigned 
 long flags)
   spin_lock_init(dev_priv-backlight_lock);
   spin_lock_init(dev_priv-uncore.lock);
   spin_lock_init(dev_priv-mm.object_stat_lock);
 + atomic_set(dev_priv-bsd_cmd_counter, 0);
   mutex_init(dev_priv-dpio_lock);
   mutex_init(dev_priv-modeset_restore_lock);
  
 @@ -1929,6 +1930,8 @@ void i915_driver_postclose(struct drm_device *dev, 
 struct drm_file *file)
  {
   struct drm_i915_file_private *file_priv = file-driver_priv;
  
 + if (file_priv  file_priv-bsd_ring)
 + file_priv-bsd_ring = NULL;
   kfree(file_priv);
  }
  
 diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
 index ac5598c3..68e8166 100644
 --- a/drivers/gpu/drm/i915/i915_drv.h
 +++ b/drivers/gpu/drm/i915/i915_drv.h
 @@ -1466,6 +1466,8 @@ struct drm_i915_private {
   struct i915_dri1_state dri1;
   /* Old ums support infrastructure, same warning applies. */
   struct i915_ums_state ums;
 + /* the lock for dispatch video commands on two BSD rings */
 + atomic_t bsd_cmd_counter;

You're still using atomic_t for no real good reason.
gen8_dispatch_bsd_ring is always called with the dev-struct_mutex lock
held, so there's really no reason for it.
-Daniel

  };
  
  static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
 @@ -1673,6 +1675,7 @@ struct drm_i915_file_private {
  
   struct i915_hw_context *private_default_ctx;
   atomic_t rps_wait_boost;
 + struct  intel_ring_buffer *bsd_ring;
  };
  
  /*
 diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
 b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
 index 341ec68..720ef17 100644
 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
 +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
 @@ -999,6 +999,34 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev,
   return 0;
  }
  
 +/**
 + * Find one BSD ring to dispatch the corresponding BSD command.
 + * The Ring ID is returned.
 + */
 +static int gen8_dispatch_bsd_ring(struct drm_device *dev,
 +   struct drm_file *file)
 +{
 + struct drm_i915_private *dev_priv = dev-dev_private;
 + struct drm_i915_file_private *file_priv = file-driver_priv;
 +
 + /* Check whether the file_priv is using one ring */
 + if (file_priv-bsd_ring)
 + return file_priv-bsd_ring-id;
 + else {
 + /* If no, use the ping-pong mechanism to select one ring */
 + int counter, ring_id;
 + smp_mb__before_atomic_inc();
 + counter = atomic_inc_return(dev_priv-bsd_cmd_counter);
 + if (counter % 2 == 0)
 + ring_id = VCS;
 + else
 + ring_id = VCS2;
 +
 + file_priv-bsd_ring = dev_priv-ring[ring_id];
 + return ring_id;
 + }
 +}
 +
  static int
  i915_gem_do_execbuffer(struct drm_device *dev, void *data,
  struct drm_file *file,
 @@ -1043,7 +1071,14 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
 *data,
  
   if ((args-flags  I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT)
   ring = dev_priv-ring[RCS];
 - else
 + else if ((args-flags  I915_EXEC_RING_MASK) == I915_EXEC_BSD) {
 + if (HAS_BSD2(dev)) {
 + int ring_id;
 + ring_id = gen8_dispatch_bsd_ring(dev, file);
 + ring = dev_priv-ring[ring_id];
 + } else
 + ring = dev_priv-ring[VCS];
 + } else
   ring = dev_priv-ring[(args-flags  I915_EXEC_RING_MASK) - 1];
  
   if 

Re: [Intel-gfx] [PATCH V2 6/6] drm/i915:Use the coarse ping-pong mechanism based on drm fd to dispatch the BSD command on BDW GT3

2014-04-14 Thread Zhao Yakui
On Mon, 2014-04-14 at 01:22 -0600, Daniel Vetter wrote:
 On Mon, Apr 14, 2014 at 12:21:44PM +0800, Zhao Yakui wrote:
  V1-V2: Follow Daniel's comment and use the simple ping-pong mechanism.
  This is only to add the support of dual BSD rings on BDW GT3 machine.
  The further optimization will be considered in another patch set.
  
  The BDW GT3 has two independent BSD rings, which can be used to process the
  video commands. To be simpler, it is transparent to user-space 
  driver/middle.
  Instead the kernel driver will decide which ring is to dispatch the BSD 
  video
  command.
  
  As every BSD ring is powerful, it is enough to dispatch the BSD video 
  command
  based on the drm fd. In such case it can play back video stream while 
  encoding
  another video stream. The coarse ping-pong mechanism is used to determine
  which BSD ring is used to dispatch the BSD video command.
  
  Signed-off-by: Zhao Yakui yakui.z...@intel.com
  ---
   drivers/gpu/drm/i915/i915_dma.c|3 +++
   drivers/gpu/drm/i915/i915_drv.h|3 +++
   drivers/gpu/drm/i915/i915_gem_execbuffer.c |   37 
  +++-
   3 files changed, 42 insertions(+), 1 deletion(-)
  
  diff --git a/drivers/gpu/drm/i915/i915_dma.c 
  b/drivers/gpu/drm/i915/i915_dma.c
  index 0b38f88..4d27cf4 100644
  --- a/drivers/gpu/drm/i915/i915_dma.c
  +++ b/drivers/gpu/drm/i915/i915_dma.c
  @@ -1572,6 +1572,7 @@ int i915_driver_load(struct drm_device *dev, unsigned 
  long flags)
  spin_lock_init(dev_priv-backlight_lock);
  spin_lock_init(dev_priv-uncore.lock);
  spin_lock_init(dev_priv-mm.object_stat_lock);
  +   atomic_set(dev_priv-bsd_cmd_counter, 0);
  mutex_init(dev_priv-dpio_lock);
  mutex_init(dev_priv-modeset_restore_lock);
   
  @@ -1929,6 +1930,8 @@ void i915_driver_postclose(struct drm_device *dev, 
  struct drm_file *file)
   {
  struct drm_i915_file_private *file_priv = file-driver_priv;
   
  +   if (file_priv  file_priv-bsd_ring)
  +   file_priv-bsd_ring = NULL;
  kfree(file_priv);
   }
   
  diff --git a/drivers/gpu/drm/i915/i915_drv.h 
  b/drivers/gpu/drm/i915/i915_drv.h
  index ac5598c3..68e8166 100644
  --- a/drivers/gpu/drm/i915/i915_drv.h
  +++ b/drivers/gpu/drm/i915/i915_drv.h
  @@ -1466,6 +1466,8 @@ struct drm_i915_private {
  struct i915_dri1_state dri1;
  /* Old ums support infrastructure, same warning applies. */
  struct i915_ums_state ums;
  +   /* the lock for dispatch video commands on two BSD rings */
  +   atomic_t bsd_cmd_counter;
 
 You're still using atomic_t for no real good reason.
 gen8_dispatch_bsd_ring is always called with the dev-struct_mutex lock
 held, so there's really no reason for it.

If the struct_mutex is used in the gen8_dispatch_bsd_ring, I can remove
the atomic_t. 
It seems that the struct_mutex is a big lock and it is used very
frequently(i915_gem.c, i915_dma.c and so on). In my point it is a little
heavier than the atomic_t if one counter is increased and returned. 

If you think that the mutex is better than atomic, I will follow your
advice.

Thanks.
Yakui

 -Daniel
 
   };
   
   static inline struct drm_i915_private *to_i915(const struct drm_device 
  *dev)
  @@ -1673,6 +1675,7 @@ struct drm_i915_file_private {
   
  struct i915_hw_context *private_default_ctx;
  atomic_t rps_wait_boost;
  +   struct  intel_ring_buffer *bsd_ring;
   };
   
   /*
  diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
  b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
  index 341ec68..720ef17 100644
  --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
  +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
  @@ -999,6 +999,34 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev,
  return 0;
   }
   
  +/**
  + * Find one BSD ring to dispatch the corresponding BSD command.
  + * The Ring ID is returned.
  + */
  +static int gen8_dispatch_bsd_ring(struct drm_device *dev,
  + struct drm_file *file)
  +{
  +   struct drm_i915_private *dev_priv = dev-dev_private;
  +   struct drm_i915_file_private *file_priv = file-driver_priv;
  +
  +   /* Check whether the file_priv is using one ring */
  +   if (file_priv-bsd_ring)
  +   return file_priv-bsd_ring-id;
  +   else {
  +   /* If no, use the ping-pong mechanism to select one ring */
  +   int counter, ring_id;
  +   smp_mb__before_atomic_inc();
  +   counter = atomic_inc_return(dev_priv-bsd_cmd_counter);
  +   if (counter % 2 == 0)
  +   ring_id = VCS;
  +   else
  +   ring_id = VCS2;
  +
  +   file_priv-bsd_ring = dev_priv-ring[ring_id];
  +   return ring_id;
  +   }
  +}
  +
   static int
   i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 struct drm_file *file,
  @@ -1043,7 +1071,14 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
  *data,
   
  if ((args-flags  I915_EXEC_RING_MASK) == 

Re: [Intel-gfx] [PATCH V2 6/6] drm/i915:Use the coarse ping-pong mechanism based on drm fd to dispatch the BSD command on BDW GT3

2014-04-14 Thread Chris Wilson
On Mon, Apr 14, 2014 at 04:05:19PM +0800, Zhao Yakui wrote:
 On Mon, 2014-04-14 at 01:22 -0600, Daniel Vetter wrote:
  You're still using atomic_t for no real good reason.
  gen8_dispatch_bsd_ring is always called with the dev-struct_mutex lock
  held, so there's really no reason for it.
 
 If the struct_mutex is used in the gen8_dispatch_bsd_ring, I can remove
 the atomic_t. 
 It seems that the struct_mutex is a big lock and it is used very
 frequently(i915_gem.c, i915_dma.c and so on). In my point it is a little
 heavier than the atomic_t if one counter is increased and returned. 
 
 If you think that the mutex is better than atomic, I will follow your
 advice.

You are already holding the struct_mutex whenever we touch the ring and
execbuffer. Even in a fine-grained world, there will still be a mutex
around all operations that touch the rings.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH V2 6/6] drm/i915:Use the coarse ping-pong mechanism based on drm fd to dispatch the BSD command on BDW GT3

2014-04-14 Thread Zhao Yakui
On Mon, 2014-04-14 at 02:19 -0600, Chris Wilson wrote:
 On Mon, Apr 14, 2014 at 04:05:19PM +0800, Zhao Yakui wrote:
  On Mon, 2014-04-14 at 01:22 -0600, Daniel Vetter wrote:
   You're still using atomic_t for no real good reason.
   gen8_dispatch_bsd_ring is always called with the dev-struct_mutex lock
   held, so there's really no reason for it.
  
  If the struct_mutex is used in the gen8_dispatch_bsd_ring, I can remove
  the atomic_t. 
  It seems that the struct_mutex is a big lock and it is used very
  frequently(i915_gem.c, i915_dma.c and so on). In my point it is a little
  heavier than the atomic_t if one counter is increased and returned. 
  
  If you think that the mutex is better than atomic, I will follow your
  advice.
 
 You are already holding the struct_mutex whenever we touch the ring and
 execbuffer. Even in a fine-grained world, there will still be a mutex
 around all operations that touch the rings.

Hi, Chris

I understand your concern. From the source code the struct_mutex
will be held when trying to do the buffer relocation and dispatch the
command in one ring. 
But my code is only to select one BSD ring. In such case the
atomic_t usage is enough and it is unnecessary to hold the struct_mutex.
If you also think that the struct_mutex is better, I can update the
code to use the struct_mutex.

Thanks.
Yakui


 -Chris
 


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH V2 6/6] drm/i915:Use the coarse ping-pong mechanism based on drm fd to dispatch the BSD command on BDW GT3

2014-04-13 Thread Zhao Yakui
V1-V2: Follow Daniel's comment and use the simple ping-pong mechanism.
This is only to add the support of dual BSD rings on BDW GT3 machine.
The further optimization will be considered in another patch set.

The BDW GT3 has two independent BSD rings, which can be used to process the
video commands. To be simpler, it is transparent to user-space driver/middle.
Instead the kernel driver will decide which ring is to dispatch the BSD video
command.

As every BSD ring is powerful, it is enough to dispatch the BSD video command
based on the drm fd. In such case it can play back video stream while encoding
another video stream. The coarse ping-pong mechanism is used to determine
which BSD ring is used to dispatch the BSD video command.

Signed-off-by: Zhao Yakui yakui.z...@intel.com
---
 drivers/gpu/drm/i915/i915_dma.c|3 +++
 drivers/gpu/drm/i915/i915_drv.h|3 +++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   37 +++-
 3 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 0b38f88..4d27cf4 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1572,6 +1572,7 @@ int i915_driver_load(struct drm_device *dev, unsigned 
long flags)
spin_lock_init(dev_priv-backlight_lock);
spin_lock_init(dev_priv-uncore.lock);
spin_lock_init(dev_priv-mm.object_stat_lock);
+   atomic_set(dev_priv-bsd_cmd_counter, 0);
mutex_init(dev_priv-dpio_lock);
mutex_init(dev_priv-modeset_restore_lock);
 
@@ -1929,6 +1930,8 @@ void i915_driver_postclose(struct drm_device *dev, struct 
drm_file *file)
 {
struct drm_i915_file_private *file_priv = file-driver_priv;
 
+   if (file_priv  file_priv-bsd_ring)
+   file_priv-bsd_ring = NULL;
kfree(file_priv);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ac5598c3..68e8166 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1466,6 +1466,8 @@ struct drm_i915_private {
struct i915_dri1_state dri1;
/* Old ums support infrastructure, same warning applies. */
struct i915_ums_state ums;
+   /* the lock for dispatch video commands on two BSD rings */
+   atomic_t bsd_cmd_counter;
 };
 
 static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
@@ -1673,6 +1675,7 @@ struct drm_i915_file_private {
 
struct i915_hw_context *private_default_ctx;
atomic_t rps_wait_boost;
+   struct  intel_ring_buffer *bsd_ring;
 };
 
 /*
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 341ec68..720ef17 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -999,6 +999,34 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev,
return 0;
 }
 
+/**
+ * Find one BSD ring to dispatch the corresponding BSD command.
+ * The Ring ID is returned.
+ */
+static int gen8_dispatch_bsd_ring(struct drm_device *dev,
+ struct drm_file *file)
+{
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   struct drm_i915_file_private *file_priv = file-driver_priv;
+
+   /* Check whether the file_priv is using one ring */
+   if (file_priv-bsd_ring)
+   return file_priv-bsd_ring-id;
+   else {
+   /* If no, use the ping-pong mechanism to select one ring */
+   int counter, ring_id;
+   smp_mb__before_atomic_inc();
+   counter = atomic_inc_return(dev_priv-bsd_cmd_counter);
+   if (counter % 2 == 0)
+   ring_id = VCS;
+   else
+   ring_id = VCS2;
+
+   file_priv-bsd_ring = dev_priv-ring[ring_id];
+   return ring_id;
+   }
+}
+
 static int
 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
   struct drm_file *file,
@@ -1043,7 +1071,14 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
*data,
 
if ((args-flags  I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT)
ring = dev_priv-ring[RCS];
-   else
+   else if ((args-flags  I915_EXEC_RING_MASK) == I915_EXEC_BSD) {
+   if (HAS_BSD2(dev)) {
+   int ring_id;
+   ring_id = gen8_dispatch_bsd_ring(dev, file);
+   ring = dev_priv-ring[ring_id];
+   } else
+   ring = dev_priv-ring[VCS];
+   } else
ring = dev_priv-ring[(args-flags  I915_EXEC_RING_MASK) - 1];
 
if (!intel_ring_initialized(ring)) {
-- 
1.7.10.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx