Re: [Intel-gfx] [PATCH v6] drm/i915/vlv: WA for Turbo and RC6 to work together.

2014-05-13 Thread Jesse Barnes
On Sun, 30 Mar 2014 11:58:48 +0530
deepa...@linux.intel.com wrote:

 @@ -843,6 +849,8 @@ struct intel_gen6_power_mgmt {
   bool rp_up_masked;
   bool rp_down_masked;
  
 + u32 ei_interrupt_count;
 +
   int last_adj;
   enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
  
 @@ -1403,6 +1411,13 @@ typedef struct drm_i915_private {
   /* gen6+ rps state */
   struct intel_gen6_power_mgmt rps;
  
 + /* rps wa up ei calculation */
 + struct intel_rps_ei_calc rps_up_ei;
 +
 + /* rps wa down ei calculation */
 + struct intel_rps_ei_calc rps_down_ei;
 +

I think Chris meant that these bits belonged in intel_gen6_power_mgmt
too.

Other than that it looks like Ville has given this his r-b so it ought
to be fine.

-- 
Jesse Barnes, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v6] drm/i915/vlv: WA for Turbo and RC6 to work together.

2014-04-28 Thread deepak . s
From: Deepak S deepa...@linux.intel.com

With RC6 enabled, BYT has an HW issue in determining the right
Gfx busyness.
WA for Turbo + RC6: Use SW based Gfx busy-ness detection to decide
on increasing/decreasing the freq. This logic will monitor C0
counters of render/media power-wells over EI period and takes
necessary action based on these values

v2: Refactor duplicate code. (Ville)

v3: Reformat the comments. (Ville)

v4: Enable required counters and remove unwanted code (Ville)

v5: Added frequency change acceleration support and remove kernel-doc
style comments. (Ville)

v6: Updated comment section and Fix w/a comment. (Ville)

Signed-off-by: Deepak S deepa...@linux.intel.com
Reviewed-by: Ville Syrjälä ville.syrj...@linux.intel.com
---
 drivers/gpu/drm/i915/i915_drv.h |  15 +
 drivers/gpu/drm/i915/i915_irq.c | 133 +++-
 drivers/gpu/drm/i915/i915_reg.h |  11 
 drivers/gpu/drm/i915/intel_pm.c |  12 +++-
 4 files changed, 167 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 6136aab..5251946 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -819,6 +819,12 @@ struct i915_suspend_saved_registers {
u32 savePCH_PORT_HOTPLUG;
 };
 
+struct intel_rps_ei_calc {
+   u32 cz_ts_ei;
+   u32 render_ei_c0;
+   u32 media_ei_c0;
+};
+
 struct intel_gen6_power_mgmt {
/* work and pm_iir are protected by dev_priv-irq_lock */
struct work_struct work;
@@ -843,6 +849,8 @@ struct intel_gen6_power_mgmt {
u8 rp1_freq;/* less than RP0 power/freqency */
u8 rp0_freq;/* Non-overclocked max frequency. */
 
+   u32 ei_interrupt_count;
+
int last_adj;
enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
 
@@ -1414,6 +1422,13 @@ struct drm_i915_private {
/* gen6+ rps state */
struct intel_gen6_power_mgmt rps;
 
+   /* rps wa up ei calculation */
+   struct intel_rps_ei_calc rps_up_ei;
+
+   /* rps wa down ei calculation */
+   struct intel_rps_ei_calc rps_down_ei;
+
+
/* ilk-only ips/rps state. Everything in here is protected by the global
 * mchdev_lock in intel_pm.c */
struct intel_ilk_power_mgmt ips;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 2446e61..7d2efc8 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1116,6 +1116,131 @@ static void notify_ring(struct drm_device *dev,
i915_queue_hangcheck(dev);
 }
 
+static u32 vlv_c0_residency(struct drm_i915_private *dev_priv,
+   struct  intel_rps_ei_calc *rps_ei)
+{
+   u32 cz_ts, cz_freq_khz;
+   u32 render_count, media_count;
+   u32 elapsed_render, elapsed_media, elapsed_time;
+   u32 residency = 0;
+
+   cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
+   cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv-mem_freq * 1000, 4);
+
+   render_count = I915_READ(VLV_RENDER_C0_COUNT_REG);
+   media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG);
+
+   if (rps_ei-cz_ts_ei == 0) {
+   rps_ei-cz_ts_ei = cz_ts;
+   rps_ei-render_ei_c0 = render_count;
+   rps_ei-media_ei_c0 = media_count;
+
+   return dev_priv-rps.cur_freq;
+   }
+
+   elapsed_time = cz_ts - rps_ei-cz_ts_ei;
+   rps_ei-cz_ts_ei = cz_ts;
+
+   elapsed_render = render_count - rps_ei-render_ei_c0;
+   rps_ei-render_ei_c0 = render_count;
+
+   elapsed_media = media_count - rps_ei-media_ei_c0;
+   rps_ei-media_ei_c0 = media_count;
+
+   /* Convert all the counters into common unit of milli sec */
+   elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC;
+   elapsed_render /=  cz_freq_khz;
+   elapsed_media /= cz_freq_khz;
+
+   /*
+* Calculate overall C0 residency percentage
+* only if elapsed time is non zero
+*/
+   if (elapsed_time) {
+   residency =
+   ((max(elapsed_render, elapsed_media) * 100)
+   / elapsed_time);
+   }
+
+   return residency;
+}
+
+/**
+ * vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU
+ * busy-ness calculated from C0 counters of render  media power wells
+ * @dev_priv: DRM device private
+ *
+ */
+static u32 vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
+{
+   u32 residency_C0_up = 0, residency_C0_down = 0;
+   u8 new_delay, adj;
+
+   dev_priv-rps.ei_interrupt_count++;
+
+   WARN_ON(!mutex_is_locked(dev_priv-rps.hw_lock));
+
+
+   if (dev_priv-rps_up_ei.cz_ts_ei == 0) {
+   vlv_c0_residency(dev_priv, dev_priv-rps_up_ei);
+   vlv_c0_residency(dev_priv, dev_priv-rps_down_ei);
+   return dev_priv-rps.cur_freq;
+   }
+
+
+   /*
+* To down throttle, C0 residency should be less than down 

[Intel-gfx] [PATCH v6] drm/i915/vlv: WA for Turbo and RC6 to work together.

2014-03-30 Thread deepak . s
From: Deepak S deepa...@linux.intel.com

With RC6 enabled, BYT has an HW issue in determining the right
Gfx busyness.
WA for Turbo + RC6: Use SW based Gfx busy-ness detection to decide
on increasing/decreasing the freq. This logic will monitor C0
counters of render/media power-wells over EI period and takes
necessary action based on these values

v2: Refactor duplicate code. (Ville)

v3: Reformat the comments. (Ville)

v4: Enable required counters and remove unwanted code (Ville)

v5: Added frequency change acceleration support and remove kernel-doc
style comments. (Ville)

v6: Updated comment section and Fix w/a comment. (Ville)

Signed-off-by: Deepak S deepa...@linux.intel.com
Reviewed-by: Ville Syrjälä ville.syrj...@linux.intel.com
---
 drivers/gpu/drm/i915/i915_drv.h |  15 +
 drivers/gpu/drm/i915/i915_irq.c | 135 +++-
 drivers/gpu/drm/i915/i915_reg.h |  12 +++-
 drivers/gpu/drm/i915/intel_pm.c |  13 +++-
 4 files changed, 170 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7c212f3..c48ea93 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -816,6 +816,12 @@ struct i915_suspend_saved_registers {
u32 savePCH_PORT_HOTPLUG;
 };
 
+struct intel_rps_ei_calc {
+   u32 cz_ts_ei;
+   u32 render_ei_c0;
+   u32 media_ei_c0;
+};
+
 struct intel_gen6_power_mgmt {
/* work and pm_iir are protected by dev_priv-irq_lock */
struct work_struct work;
@@ -843,6 +849,8 @@ struct intel_gen6_power_mgmt {
bool rp_up_masked;
bool rp_down_masked;
 
+   u32 ei_interrupt_count;
+
int last_adj;
enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
 
@@ -1403,6 +1411,13 @@ typedef struct drm_i915_private {
/* gen6+ rps state */
struct intel_gen6_power_mgmt rps;
 
+   /* rps wa up ei calculation */
+   struct intel_rps_ei_calc rps_up_ei;
+
+   /* rps wa down ei calculation */
+   struct intel_rps_ei_calc rps_down_ei;
+
+
/* ilk-only ips/rps state. Everything in here is protected by the global
 * mchdev_lock in intel_pm.c */
struct intel_ilk_power_mgmt ips;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 300f127..341843d 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1121,6 +1121,132 @@ void gen6_set_pm_mask(struct drm_i915_private *dev_priv,
}
 }
 
+static u32 vlv_c0_residency(struct drm_i915_private *dev_priv,
+   struct  intel_rps_ei_calc *rps_ei)
+{
+   u32 cz_ts, cz_freq_khz;
+   u32 render_count, media_count;
+   u32 elapsed_render, elapsed_media, elapsed_time;
+   u32 residency = 0;
+
+   cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
+   cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv-mem_freq * 1000, 4);
+
+   render_count = I915_READ(VLV_RENDER_C0_COUNT_REG);
+   media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG);
+
+   if (rps_ei-cz_ts_ei == 0) {
+   rps_ei-cz_ts_ei = cz_ts;
+   rps_ei-render_ei_c0 = render_count;
+   rps_ei-media_ei_c0 = media_count;
+
+   return dev_priv-rps.cur_freq;
+   }
+
+   elapsed_time = cz_ts - rps_ei-cz_ts_ei;
+   rps_ei-cz_ts_ei = cz_ts;
+
+   elapsed_render = render_count - rps_ei-render_ei_c0;
+   rps_ei-render_ei_c0 = render_count;
+
+   elapsed_media = media_count - rps_ei-media_ei_c0;
+   rps_ei-media_ei_c0 = media_count;
+
+   /* Convert all the counters into common unit of milli sec */
+   elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC;
+   elapsed_render /=  cz_freq_khz;
+   elapsed_media /= cz_freq_khz;
+
+   /*
+* Calculate overall C0 residency percentage
+* only if elapsed time is non zero
+*/
+   if (elapsed_time) {
+   residency =
+   ((max(elapsed_render, elapsed_media) * 100)
+   / elapsed_time);
+   }
+
+   return residency;
+}
+
+
+/**
+ * vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU
+ * busy-ness calculated from C0 counters of render  media power wells
+ * @dev_priv: DRM device private
+ *
+ */
+static u32 vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
+{
+   u32 residency_C0_up = 0, residency_C0_down = 0;
+   u8 new_delay, adj;
+
+   dev_priv-rps.ei_interrupt_count++;
+
+   WARN_ON(!mutex_is_locked(dev_priv-rps.hw_lock));
+
+
+   if (dev_priv-rps_up_ei.cz_ts_ei == 0) {
+   vlv_c0_residency(dev_priv, dev_priv-rps_up_ei);
+   vlv_c0_residency(dev_priv, dev_priv-rps_down_ei);
+   return dev_priv-rps.cur_freq;
+   }
+
+
+   /*
+* To down throttle, C0 residency should be less than down threshold
+* for continous EI intervals. So calculate down EI counters
+*