[PATCH 4/4] drm/intel: Push get_scanout_position() timestamping into kms driver.

2013-10-30 Thread Mario Kleiner
Move the ktime_get() clock readouts and potential preempt_disable()
calls from drm core into kms driver to make it compatible with the
api changes in the drm core.

The intel-kms driver needs to take the uncore.lock inside
i915_get_crtc_scanoutpos() and intel_pipe_in_vblank().
This is incompatible with the preempt_disable() on a
PREEMPT_RT patched kernel, as regular spin locks must not
be taken within a preempt_disable'd section. Lock contention
on the uncore.lock also introduced too much uncertainty in vblank
timestamps.

Push the ktime_get() timestamping for scanoutpos queries and
potential preempt_disable_rt() into i915_get_crtc_scanoutpos(),
so these problems can be avoided:

1. First lock the uncore.lock (might sleep on a PREEMPT_RT kernel).
2. preempt_disable_rt() (will be added by the rt-linux folks).
3. ktime_get() a timestamp before scanout pos query.
4. Do all mmio reads as fast as possible without grabbing any new locks!
5. ktime_get() a post-query timestamp.
6. preempt_enable_rt()
7. Unlock the uncore.lock.

This reduces timestamp uncertainty on a low-end HP Atom Mini netbook
with Intel GMA-950 nicely:

Before: 3-8 usecs with spikes > 20 usecs, triggering query retries.
After : Typically 1 usec (98% of all samples), occassionally 2 usecs
(2% of all samples), with maximum of 3 usecs (a handful).

v2: Fix formatting of new multi-line code comments.

Signed-off-by: Mario Kleiner 
Reviewed-by: Ville Syrj?l? 
Reviewed-by: Alex Deucher 
---
 drivers/gpu/drm/i915/i915_irq.c |   54 +++
 1 file changed, 43 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 156a1a4..7cafe64 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -599,35 +599,40 @@ static u32 gm45_get_vblank_counter(struct drm_device 
*dev, int pipe)
return I915_READ(reg);
 }

-static bool intel_pipe_in_vblank(struct drm_device *dev, enum pipe pipe)
+/* raw reads, only for fast reads of display block, no need for forcewake etc. 
*/
+#define __raw_i915_read32(dev_priv__, reg__) readl((dev_priv__)->regs + 
(reg__))
+#define __raw_i915_read16(dev_priv__, reg__) readw((dev_priv__)->regs + 
(reg__))
+
+static bool intel_pipe_in_vblank_locked(struct drm_device *dev, enum pipe pipe)
 {
struct drm_i915_private *dev_priv = dev->dev_private;
uint32_t status;
+   int reg;

if (IS_VALLEYVIEW(dev)) {
status = pipe == PIPE_A ?
I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT :
I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT;

-   return I915_READ(VLV_ISR) & status;
+   reg = VLV_ISR;
} else if (IS_GEN2(dev)) {
status = pipe == PIPE_A ?
I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT :
I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT;

-   return I915_READ16(ISR) & status;
+   reg = ISR;
} else if (INTEL_INFO(dev)->gen < 5) {
status = pipe == PIPE_A ?
I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT :
I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT;

-   return I915_READ(ISR) & status;
+   reg = ISR;
} else if (INTEL_INFO(dev)->gen < 7) {
status = pipe == PIPE_A ?
DE_PIPEA_VBLANK :
DE_PIPEB_VBLANK;

-   return I915_READ(DEISR) & status;
+   reg = DEISR;
} else {
switch (pipe) {
default:
@@ -642,12 +647,17 @@ static bool intel_pipe_in_vblank(struct drm_device *dev, 
enum pipe pipe)
break;
}

-   return I915_READ(DEISR) & status;
+   reg = DEISR;
}
+
+   if (IS_GEN2(dev))
+   return __raw_i915_read16(dev_priv, reg) & status;
+   else
+   return __raw_i915_read32(dev_priv, reg) & status;
 }

 static int i915_get_crtc_scanoutpos(struct drm_device *dev, int pipe,
-int *vpos, int *hpos)
+int *vpos, int *hpos, ktime_t *stime, ktime_t 
*etime)
 {
struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe];
@@ -657,6 +667,7 @@ static int i915_get_crtc_scanoutpos(struct drm_device *dev, 
int pipe,
int vbl_start, vbl_end, htotal, vtotal;
bool in_vbl = true;
int ret = 0;
+   unsigned long irqflags;

if (!intel_crtc->active) {
DRM_DEBUG_DRIVER("trying to get scanoutpos for disabled "
@@ -671,14 +682,27 @@ static int i915_get_crtc_scanoutpos(struct drm_device 
*dev, int pipe,

ret |= DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE;

+   /*
+* Lock uncore.lock, as we will do multiple timing critical raw
+* register reads, potentially with preemption 

[PATCH 4/4] drm/intel: Push get_scanout_position() timestamping into kms driver.

2013-10-26 Thread Mario Kleiner
Move the ktime_get() clock readouts and potential preempt_disable()
calls from drm core into kms driver to make it compatible with the
api changes in the drm core.

The intel-kms driver needs to take the uncore.lock inside
i915_get_crtc_scanoutpos() and intel_pipe_in_vblank().
This is incompatible with the preempt_disable() on a
PREEMPT_RT patched kernel, as regular spin locks must not
be taken within a preempt_disable'd section. Lock contention
on the uncore.lock also introduced too much uncertainty in vblank
timestamps.

Push the ktime_get() timestamping for scanoutpos queries and
potential preempt_disable_rt() into i915_get_crtc_scanoutpos(),
so these problems can be avoided:

1. First lock the uncore.lock (might sleep on a PREEMPT_RT kernel).
2. preempt_disable_rt() (will be added by the rt-linux folks).
3. ktime_get() a timestamp before scanout pos query.
4. Do all mmio reads as fast as possible without grabbing any new locks!
5. ktime_get() a post-query timestamp.
6. preempt_enable_rt()
7. Unlock the uncore.lock.

This reduces timestamp uncertainty on a low-end HP Atom Mini netbook
with Intel GMA-950 nicely:

Before: 3-8 usecs with spikes > 20 usecs, triggering query retries.
After : Typically 1 usec (98% of all samples), occassionally 2 usecs
(2% of all samples), with maximum of 3 usecs (a handful).

Signed-off-by: Mario Kleiner 
---
 drivers/gpu/drm/i915/i915_irq.c |   53 +++
 1 file changed, 42 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 156a1a4..a3e41d3 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -599,35 +599,40 @@ static u32 gm45_get_vblank_counter(struct drm_device 
*dev, int pipe)
return I915_READ(reg);
 }

-static bool intel_pipe_in_vblank(struct drm_device *dev, enum pipe pipe)
+/* raw reads, only for fast reads of display block, no need for forcewake etc. 
*/
+#define __raw_i915_read32(dev_priv__, reg__) readl((dev_priv__)->regs + 
(reg__))
+#define __raw_i915_read16(dev_priv__, reg__) readw((dev_priv__)->regs + 
(reg__))
+
+static bool intel_pipe_in_vblank_locked(struct drm_device *dev, enum pipe pipe)
 {
struct drm_i915_private *dev_priv = dev->dev_private;
uint32_t status;
+   int reg;

if (IS_VALLEYVIEW(dev)) {
status = pipe == PIPE_A ?
I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT :
I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT;

-   return I915_READ(VLV_ISR) & status;
+   reg = VLV_ISR;
} else if (IS_GEN2(dev)) {
status = pipe == PIPE_A ?
I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT :
I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT;

-   return I915_READ16(ISR) & status;
+   reg = ISR;
} else if (INTEL_INFO(dev)->gen < 5) {
status = pipe == PIPE_A ?
I915_DISPLAY_PIPE_A_VBLANK_INTERRUPT :
I915_DISPLAY_PIPE_B_VBLANK_INTERRUPT;

-   return I915_READ(ISR) & status;
+   reg = ISR;
} else if (INTEL_INFO(dev)->gen < 7) {
status = pipe == PIPE_A ?
DE_PIPEA_VBLANK :
DE_PIPEB_VBLANK;

-   return I915_READ(DEISR) & status;
+   reg = DEISR;
} else {
switch (pipe) {
default:
@@ -642,12 +647,17 @@ static bool intel_pipe_in_vblank(struct drm_device *dev, 
enum pipe pipe)
break;
}

-   return I915_READ(DEISR) & status;
+   reg = DEISR;
}
+
+   if (IS_GEN2(dev))
+   return __raw_i915_read16(dev_priv, reg) & status;
+   else
+   return __raw_i915_read32(dev_priv, reg) & status;
 }

 static int i915_get_crtc_scanoutpos(struct drm_device *dev, int pipe,
-int *vpos, int *hpos)
+int *vpos, int *hpos, ktime_t *stime, ktime_t 
*etime)
 {
struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe];
@@ -657,6 +667,7 @@ static int i915_get_crtc_scanoutpos(struct drm_device *dev, 
int pipe,
int vbl_start, vbl_end, htotal, vtotal;
bool in_vbl = true;
int ret = 0;
+   unsigned long irqflags;

if (!intel_crtc->active) {
DRM_DEBUG_DRIVER("trying to get scanoutpos for disabled "
@@ -671,14 +682,26 @@ static int i915_get_crtc_scanoutpos(struct drm_device 
*dev, int pipe,

ret |= DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE;

+   /* Lock uncore.lock, as we will do multiple timing critical raw
+* register reads, potentially with preemption disabled, so the
+* following code must not block on uncore.lock.
+*/
+