[Intel-gfx] [PATCH] drm/i915: bdw: fix RC6 enabled status reporting and disable runtime PM

2014-04-28 Thread Imre Deak
On BDW we don't enable RC6 at the moment, but this isn't reflected in
the (sanitized) i915.enable_rc6 option. So make enable_rc6 report
correctly that RC6 is disabled, which will also effectively disable RPM
on BDW (since RPM depends on RC6).

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=77565

Signed-off-by: Imre Deak imre.d...@intel.com
---
 drivers/gpu/drm/i915/intel_pm.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index d49ec02..19020e5 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3260,6 +3260,10 @@ static int sanitize_rc6_option(const struct drm_device 
*dev, int enable_rc6)
if (INTEL_INFO(dev)-gen == 5  !IS_IRONLAKE_M(dev))
return 0;
 
+   /* Disable RC6 on Broadwell for now */
+   if (IS_BROADWELL(dev))
+   return 0;
+
/* Respect the kernel parameter if it is set */
if (enable_rc6 = 0) {
int mask;
-- 
1.8.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC] tests/gem_bo_falloc: New igt for testing gem_fallocate() ioctl

2014-04-28 Thread arun . siluvery
From: Siluvery, Arun arun.siluv...@intel.com

This ioctl allows vary the effective size of the gem object.
User can mark certain range in object space as scratch thus
effectively modifying the size used.

v2: modify subtest names and function names as per tooling convention.

Signed-off-by: Siluvery, Arun arun.siluv...@intel.com
---
 tests/Makefile.sources |   1 +
 tests/gem_bo_falloc.c  | 471 +
 2 files changed, 472 insertions(+)
 create mode 100644 tests/gem_bo_falloc.c

diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index 88866ac..25c010e 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -120,6 +120,7 @@ TESTS_progs = \
gem_unref_active_buffers \
gem_vmap_blits \
gem_wait_render_timeout \
+   gem_bo_falloc \
gen3_mixed_blits \
gen3_render_linear_blits \
gen3_render_mixed_blits \
diff --git a/tests/gem_bo_falloc.c b/tests/gem_bo_falloc.c
new file mode 100644
index 000..d6b7f10
--- /dev/null
+++ b/tests/gem_bo_falloc.c
@@ -0,0 +1,471 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the Software),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ *
+ */
+
+#include unistd.h
+#include stdlib.h
+#include stdio.h
+#include string.h
+#include fcntl.h
+#include inttypes.h
+#include errno.h
+#include sys/stat.h
+#include sys/ioctl.h
+#include drm.h
+#include i915_drm.h
+#include drmtest.h
+#include intel_chipset.h
+#include intel_gpu_tools.h
+
+#define OBJECT_SIZE (8 * PAGE_SIZE)
+#define COPY_BLT_CMD   (229|0x5322|0x6)
+#define BLT_WRITE_ALPHA(121)
+#define BLT_WRITE_RGB  (120)
+#define BLT_SRC_TILED  (115)
+#define BLT_DST_TILED  (111)
+
+static uint8_t buf[OBJECT_SIZE];
+
+static uint32_t create_bo(int fd)
+{
+   int i;
+   uint32_t page_count;
+   uint32_t handle;
+
+   handle = gem_create(fd, sizeof(buf));
+   page_count = sizeof(buf) / PAGE_SIZE;
+
+   for (i = 0; i  page_count; ++i)
+   memset(buf + (i * PAGE_SIZE), i+1, PAGE_SIZE);
+
+   gem_write(fd, handle, 0, buf, sizeof(buf));
+   return handle;
+}
+
+static int gem_linear_blt(int fd,
+ uint32_t *batch,
+ uint32_t src,
+ uint32_t dst,
+ uint32_t length,
+ struct drm_i915_gem_relocation_entry *reloc)
+{
+   uint32_t *b = batch;
+   int height = length / (16 * 1024);
+
+   igt_assert(height = 116);
+
+   if (height) {
+   int i = 0;
+   b[i++] = COPY_BLT_CMD | BLT_WRITE_ALPHA | BLT_WRITE_RGB;
+   if (intel_gen(intel_get_drm_devid(fd)) = 8)
+   b[i-1]+=2;
+   b[i++] = 0xcc  16 | 1  25 | 1  24 | (16*1024);
+   b[i++] = 0;
+   b[i++] = height  16 | (4*1024);
+   b[i++] = 0;
+   reloc-offset = (b-batch+4) * sizeof(uint32_t);
+   reloc-delta = 0;
+   reloc-target_handle = dst;
+   reloc-read_domains = I915_GEM_DOMAIN_RENDER;
+   reloc-write_domain = I915_GEM_DOMAIN_RENDER;
+   reloc-presumed_offset = 0;
+   reloc++;
+   if (intel_gen(intel_get_drm_devid(fd)) = 8)
+   b[i++] = 0; /* FIXME */
+
+   b[i++] = 0;
+   b[i++] = 16*1024;
+   b[i++] = 0;
+   reloc-offset = (b-batch+7) * sizeof(uint32_t);
+   if (intel_gen(intel_get_drm_devid(fd)) = 8)
+   reloc-offset += sizeof(uint32_t);
+   reloc-delta = 0;
+   reloc-target_handle = src;
+   reloc-read_domains = I915_GEM_DOMAIN_RENDER;
+   reloc-write_domain = 0;
+   reloc-presumed_offset = 0;
+   reloc++;
+   if 

[Intel-gfx] [PATCH for stable 3.14 only 1/1] drm/i915: restore QUIRK_NO_PCH_PWM_ENABLE

2014-04-28 Thread Jani Nikula
This reverts the bisected regressing

commit bc0bb9fd1c7810407ab810d204bbaecb255fddde
Author: Jani Nikula jani.nik...@intel.com
Date:   Thu Nov 14 12:14:29 2013 +0200

drm/i915: remove QUIRK_NO_PCH_PWM_ENABLE

restoring QUIRK_NO_PCH_PWM_ENABLE for a couple of Dell XPS models which
broke in 3.14.

There is no such revert upstream. We have root caused and fixed the
issue upstream, without the quirk, with:

commit 39fbc9c8f6765959b55e0b127dd5c57df5a47d67
Author: Jani Nikula jani.nik...@intel.com
Date:   Wed Apr 9 11:22:06 2014 +0300

drm/i915: check VBT for supported backlight type

and

commit c675949ec58ca50d5a3ae3c757892f1560f6e896
Author: Jani Nikula jani.nik...@intel.com
Date:   Wed Apr 9 11:31:37 2014 +0300

drm/i915: do not setup backlight if not available according to VBT

While the commits are within the stable rules otherwise, and fix more
machines than just the regressed Dell XPS models, we feel backporting
them to stable may be too risky. The revert is limited to the broken
machines, and the impact should be effectively the same as what the
upstream commits do more generally.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=76276
Reported-by: Romain Francoise rom...@orebokech.com
CC: Kamal Mostafa ka...@canonical.com
CC: Daniel Vetter dan...@ffwll.ch
CC: sta...@vger.kernel.org (3.14 only)
Signed-off-by: Jani Nikula jani.nik...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.h  |  1 +
 drivers/gpu/drm/i915/intel_display.c | 16 
 drivers/gpu/drm/i915/intel_panel.c   |  4 
 3 files changed, 21 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index df77e20e3c3d..697f2150a997 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -734,6 +734,7 @@ enum intel_sbi_destination {
 #define QUIRK_PIPEA_FORCE (10)
 #define QUIRK_LVDS_SSC_DISABLE (11)
 #define QUIRK_INVERT_BRIGHTNESS (12)
+#define QUIRK_NO_PCH_PWM_ENABLE (13)
 
 struct intel_fbdev;
 struct intel_fbc_work;
diff --git a/drivers/gpu/drm/i915/intel_display.c 
b/drivers/gpu/drm/i915/intel_display.c
index 9b8a7c7ea7fc..963639d9049b 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -10771,6 +10771,17 @@ static void quirk_invert_brightness(struct drm_device 
*dev)
DRM_INFO(applying inverted panel brightness quirk\n);
 }
 
+/*
+ * Some machines (Dell XPS13) suffer broken backlight controls if
+ * BLM_PCH_PWM_ENABLE is set.
+ */
+static void quirk_no_pcm_pwm_enable(struct drm_device *dev)
+{
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   dev_priv-quirks |= QUIRK_NO_PCH_PWM_ENABLE;
+   DRM_INFO(applying no-PCH_PWM_ENABLE quirk\n);
+}
+
 struct intel_quirk {
int device;
int subsystem_vendor;
@@ -10839,6 +10850,11 @@ static struct intel_quirk intel_quirks[] = {
 
/* Acer Aspire 4736Z */
{ 0x2a42, 0x1025, 0x0260, quirk_invert_brightness },
+
+   /* Dell XPS13 HD Sandy Bridge */
+   { 0x0116, 0x1028, 0x052e, quirk_no_pcm_pwm_enable },
+   /* Dell XPS13 HD and XPS13 FHD Ivy Bridge */
+   { 0x0166, 0x1028, 0x058b, quirk_no_pcm_pwm_enable },
 };
 
 static void intel_init_quirks(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/intel_panel.c 
b/drivers/gpu/drm/i915/intel_panel.c
index 079ea38f14d9..9f1d7a9300e8 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -671,6 +671,10 @@ static void pch_enable_backlight(struct intel_connector 
*connector)
pch_ctl2 = panel-backlight.max  16;
I915_WRITE(BLC_PWM_PCH_CTL2, pch_ctl2);
 
+   /* XXX: transitional */
+   if (dev_priv-quirks  QUIRK_NO_PCH_PWM_ENABLE)
+   return;
+
pch_ctl1 = 0;
if (panel-backlight.active_low_pwm)
pch_ctl1 |= BLM_PCH_POLARITY;
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH for stable 3.14 only 0/1] drm/i915: restore QUIRK_NO_PCH_PWM_ENABLE

2014-04-28 Thread Jani Nikula
Stable team -

I'd like to hear your opinions on this one. It reverts a commit that
regressed in 3.14, but the revert does not exist upstream. Instead we've
root caused the issue and provided a real fix for upstream, but we're
hesitant to backport that to stable. Functionally the effect of the
revert is similar to the real fix, but only impacts a few models, while
the real fix has much broader scope. See the commit message for details.

Romain, Kamal, I'd appreciate it if you could provide your tested-by
with this on top of 3.14.2 on the failing Dell XPS models. Thanks.

BR,
Jani.


Jani Nikula (1):
  drm/i915: restore QUIRK_NO_PCH_PWM_ENABLE

 drivers/gpu/drm/i915/i915_drv.h  |  1 +
 drivers/gpu/drm/i915/intel_display.c | 16 
 drivers/gpu/drm/i915/intel_panel.c   |  4 
 3 files changed, 21 insertions(+)

-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH for stable 3.14 only 1/1] drm/i915: restore QUIRK_NO_PCH_PWM_ENABLE

2014-04-28 Thread Romain Francoise
Jani Nikula jani.nik...@intel.com writes:

 This reverts the bisected regressing

 commit bc0bb9fd1c7810407ab810d204bbaecb255fddde
 Author: Jani Nikula jani.nik...@intel.com
 Date:   Thu Nov 14 12:14:29 2013 +0200

 drm/i915: remove QUIRK_NO_PCH_PWM_ENABLE

 restoring QUIRK_NO_PCH_PWM_ENABLE for a couple of Dell XPS models which
 broke in 3.14.

I've been running with this revert since v3.14-rc (and now v3.14.2), so:

Tested-by: Romain Francoise rom...@orebokech.com
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v2 49/71] drm/i915/chv: Add CHV display support

2014-04-28 Thread ville . syrjala
From: Rafael Barbalho rafael.barba...@intel.com

Add support for the third pipe in cherrview

v2: Don't use spaces for indentation (Jani)
Wrap long lines

Reviewed-by: Imre Deak imre.d...@intel.com
Signed-off-by: Rafael Barbalho rafael.barba...@intel.com
[vsyrjala: slightly massaged the patch]
Signed-off-by: Ville Syrjälä ville.syrj...@linux.intel.com
---
 drivers/gpu/drm/i915/i915_drv.c | 12 
 drivers/gpu/drm/i915/i915_reg.h | 11 ---
 2 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 3f57237..0fd3046 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -49,6 +49,17 @@ static struct drm_driver driver;
.dpll_md_offsets = { DPLL_A_MD_OFFSET, DPLL_B_MD_OFFSET }, \
.palette_offsets = { PALETTE_A_OFFSET, PALETTE_B_OFFSET }
 
+#define GEN_CHV_PIPEOFFSETS \
+   .pipe_offsets = { PIPE_A_OFFSET, PIPE_B_OFFSET, \
+ CHV_PIPE_C_OFFSET }, \
+   .trans_offsets = { TRANSCODER_A_OFFSET, TRANSCODER_B_OFFSET, \
+  CHV_TRANSCODER_C_OFFSET, }, \
+   .dpll_offsets = { DPLL_A_OFFSET, DPLL_B_OFFSET, \
+ CHV_DPLL_C_OFFSET }, \
+   .dpll_md_offsets = { DPLL_A_MD_OFFSET, DPLL_B_MD_OFFSET, \
+CHV_DPLL_C_MD_OFFSET }, \
+   .palette_offsets = { PALETTE_A_OFFSET, PALETTE_B_OFFSET, \
+CHV_PALETTE_C_OFFSET }
 
 static const struct intel_device_info intel_i830_info = {
.gen = 2, .is_mobile = 1, .cursor_needs_physical = 1, .num_pipes = 2,
@@ -286,6 +297,7 @@ static const struct intel_device_info intel_cherryview_info 
= {
.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
.is_valleyview = 1,
.display_mmio_offset = VLV_DISPLAY_BASE,
+   GEN_CHV_PIPEOFFSETS,
 };
 
 /*
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 74ac1c2..9138eff 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -1440,6 +1440,7 @@ enum punit_power_well {
  */
 #define DPLL_A_OFFSET 0x6014
 #define DPLL_B_OFFSET 0x6018
+#define CHV_DPLL_C_OFFSET 0x6030
 #define DPLL(pipe) (dev_priv-info.dpll_offsets[pipe] + \
dev_priv-info.display_mmio_offset)
 
@@ -1531,6 +1532,7 @@ enum punit_power_well {
 
 #define DPLL_A_MD_OFFSET 0x601c /* 965+ only */
 #define DPLL_B_MD_OFFSET 0x6020 /* 965+ only */
+#define CHV_DPLL_C_MD_OFFSET 0x603c
 #define DPLL_MD(pipe) (dev_priv-info.dpll_md_offsets[pipe] + \
   dev_priv-info.display_mmio_offset)
 
@@ -1727,6 +1729,7 @@ enum punit_power_well {
  */
 #define PALETTE_A_OFFSET 0xa000
 #define PALETTE_B_OFFSET 0xa800
+#define CHV_PALETTE_C_OFFSET 0xc000
 #define PALETTE(pipe) (dev_priv-info.palette_offsets[pipe] + \
   dev_priv-info.display_mmio_offset)
 
@@ -2216,6 +2219,7 @@ enum punit_power_well {
 #define TRANSCODER_A_OFFSET 0x6
 #define TRANSCODER_B_OFFSET 0x61000
 #define TRANSCODER_C_OFFSET 0x62000
+#define CHV_TRANSCODER_C_OFFSET 0x63000
 #define TRANSCODER_EDP_OFFSET 0x6f000
 
 #define _TRANSCODER2(pipe, reg) (dev_priv-info.trans_offsets[(pipe)] - \
@@ -3543,9 +3547,10 @@ enum punit_power_well {
 #define PIPESTAT_INT_ENABLE_MASK   0x7fff
 #define PIPESTAT_INT_STATUS_MASK   0x
 
-#define PIPE_A_OFFSET  0x7
-#define PIPE_B_OFFSET  0x71000
-#define PIPE_C_OFFSET  0x72000
+#define PIPE_A_OFFSET  0x7
+#define PIPE_B_OFFSET  0x71000
+#define PIPE_C_OFFSET  0x72000
+#define CHV_PIPE_C_OFFSET  0x74000
 /*
  * There's actually no pipe EDP. Some pipe registers have
  * simply shifted from the pipe to the transcoder, while
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v2 53/71] drm/i915/chv: Configure crtc_mask correctly for CHV

2014-04-28 Thread ville . syrjala
From: Ville Syrjälä ville.syrj...@linux.intel.com

On CHV pipe C can driver only port D, and pipes A and B can drivbe only
ports B and C. Configure the crtc_mask appropriately to reflect that.

v2: Moar braces (Jani)

Signed-off-by: Ville Syrjälä ville.syrj...@linux.intel.com
---
 drivers/gpu/drm/i915/intel_dp.c   | 9 -
 drivers/gpu/drm/i915/intel_hdmi.c | 9 -
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 27e0c86..a3cb9d8 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -4079,7 +4079,14 @@ intel_dp_init(struct drm_device *dev, int output_reg, 
enum port port)
intel_dig_port-dp.output_reg = output_reg;
 
intel_encoder-type = INTEL_OUTPUT_DISPLAYPORT;
-   intel_encoder-crtc_mask = (1  0) | (1  1) | (1  2);
+   if (IS_CHERRYVIEW(dev)) {
+   if (port == PORT_D)
+   intel_encoder-crtc_mask = 1  2;
+   else
+   intel_encoder-crtc_mask = (1  0) | (1  1);
+   } else {
+   intel_encoder-crtc_mask = (1  0) | (1  1) | (1  2);
+   }
intel_encoder-cloneable = 0;
intel_encoder-hot_plug = intel_dp_hot_plug;
 
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c 
b/drivers/gpu/drm/i915/intel_hdmi.c
index 1e8d2a9..d4e020e 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -1452,7 +1452,14 @@ void intel_hdmi_init(struct drm_device *dev, int 
hdmi_reg, enum port port)
}
 
intel_encoder-type = INTEL_OUTPUT_HDMI;
-   intel_encoder-crtc_mask = (1  0) | (1  1) | (1  2);
+   if (IS_CHERRYVIEW(dev)) {
+   if (port == PORT_D)
+   intel_encoder-crtc_mask = 1  2;
+   else
+   intel_encoder-crtc_mask = (1  0) | (1  1);
+   } else {
+   intel_encoder-crtc_mask = (1  0) | (1  1) | (1  2);
+   }
intel_encoder-cloneable = 1  INTEL_OUTPUT_ANALOG;
/*
 * BSpec is unclear about HDMI+HDMI cloning on g4x, but it seems
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v2 63/71] drm/i915/chv: Set soft reset override bit for data lane resets

2014-04-28 Thread ville . syrjala
From: Ville Syrjälä ville.syrj...@linux.intel.com

The bits we've been setting so far only progagate the reset singal to
the data lanes. To actaully force the reset signal we need to set another
override bit.

v2: Fix mispalced ';' (Mika)

Reviewed-by: Mika Kuoppala mika.kuopp...@intel.com
Signed-off-by: Ville Syrjälä ville.syrj...@linux.intel.com
---
 drivers/gpu/drm/i915/i915_reg.h   | 1 +
 drivers/gpu/drm/i915/intel_dp.c   | 8 
 drivers/gpu/drm/i915/intel_hdmi.c | 8 
 3 files changed, 17 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 206d600..3c2c8b1 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -665,6 +665,7 @@ enum punit_power_well {
 
 #define _VLV_PCS_DW1_CH0   0x8204
 #define _VLV_PCS_DW1_CH1   0x8404
+#define   CHV_PCS_REQ_SOFTRESET_EN (123)
 #define   DPIO_PCS_CLK_CRI_RXEB_EIOS_EN(122)
 #define   DPIO_PCS_CLK_CRI_RXDIGFILTSG_EN (121)
 #define   DPIO_PCS_CLK_DATAWIDTH_SHIFT (6)
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 23a8b21..811e1e8 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -1854,6 +1854,10 @@ static void chv_post_disable_dp(struct intel_encoder 
*encoder)
mutex_lock(dev_priv-dpio_lock);
 
/* Propagate soft reset to data lane reset */
+   val = vlv_dpio_read(dev_priv, pipe, VLV_PCS_DW1(ch));
+   val |= CHV_PCS_REQ_SOFTRESET_EN;
+   vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW1(ch), val);
+
val = vlv_dpio_read(dev_priv, pipe, VLV_PCS_DW0(ch));
val = ~(DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET);
vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW0(ch), val);
@@ -1988,6 +1992,10 @@ static void chv_pre_enable_dp(struct intel_encoder 
*encoder)
mutex_lock(dev_priv-dpio_lock);
 
/* Deassert soft data lane reset*/
+   val = vlv_dpio_read(dev_priv, pipe, VLV_PCS_DW1(ch));
+   val |= CHV_PCS_REQ_SOFTRESET_EN;
+   vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW1(ch), val);
+
val = vlv_dpio_read(dev_priv, pipe, VLV_PCS_DW0(ch));
val |= (DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET);
vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW0(ch), val);
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c 
b/drivers/gpu/drm/i915/intel_hdmi.c
index 6d86bde..e04b1ae 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -1241,6 +1241,10 @@ static void chv_hdmi_post_disable(struct intel_encoder 
*encoder)
mutex_lock(dev_priv-dpio_lock);
 
/* Propagate soft reset to data lane reset */
+   val = vlv_dpio_read(dev_priv, pipe, VLV_PCS_DW1(ch));
+   val |= CHV_PCS_REQ_SOFTRESET_EN;
+   vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW1(ch), val);
+
val = vlv_dpio_read(dev_priv, pipe, VLV_PCS_DW0(ch));
val = ~(DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET);
vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW0(ch), val);
@@ -1263,6 +1267,10 @@ static void chv_hdmi_pre_enable(struct intel_encoder 
*encoder)
mutex_lock(dev_priv-dpio_lock);
 
/* Deassert soft data lane reset*/
+   val = vlv_dpio_read(dev_priv, pipe, VLV_PCS_DW1(ch));
+   val |= CHV_PCS_REQ_SOFTRESET_EN;
+   vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW1(ch), val);
+
val = vlv_dpio_read(dev_priv, pipe, VLV_PCS_DW0(ch));
val |= (DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET);
vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW0(ch), val);
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 41/71] drm/i915/chv: Add some workaround notes

2014-04-28 Thread Ville Syrjälä
On Fri, Apr 25, 2014 at 05:43:55PM -0300, Paulo Zanoni wrote:
 2014-04-09 7:28 GMT-03:00  ville.syrj...@linux.intel.com:
  From: Ville Syrjälä ville.syrj...@linux.intel.com
 
  We implement the following workarounds:
  * WaDisableAsyncFlipPerfMode:chv
  * WaDisableSemaphoreAndSyncFlipWait:chv (at least partially)
 
 In the rebased version (on your gitorious tree, chv_rebase branch),
 the chunk for this WA got removed. I don't know if this was an
 accident or not. We need to, at least, fix the commit message.

Yeah I misread the spec and though that the idle msg disable bit is
there for all rings. But after rechecking I noticed that it was only
valid for the render ring.

I'll resend this patch with the WaDisableSemaphoreAndSyncFlipWait
comment dropped.

 
 
  * WaProgramMiArbOnOffAroundMiSetContext:chv
 
  Signed-off-by: Ville Syrjälä ville.syrj...@linux.intel.com
  ---
   drivers/gpu/drm/i915/i915_gem.c | 1 +
   drivers/gpu/drm/i915/i915_gem_context.c | 2 +-
   drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +-
   3 files changed, 3 insertions(+), 2 deletions(-)
 
  diff --git a/drivers/gpu/drm/i915/i915_gem.c 
  b/drivers/gpu/drm/i915/i915_gem.c
  index 84a7171..a9c33ec 100644
  --- a/drivers/gpu/drm/i915/i915_gem.c
  +++ b/drivers/gpu/drm/i915/i915_gem.c
  @@ -4376,6 +4376,7 @@ static int i915_gem_init_rings(struct drm_device *dev)
  struct intel_ring_buffer *ring;
  int i;
 
  +   /* WaDisableSemaphoreAndSyncFlipWait:chv */
  for_each_ring(ring, dev_priv, i)
  I915_WRITE(RING_RC_PSMI_CONTROL(ring),
 
  _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
  diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
  b/drivers/gpu/drm/i915/i915_gem_context.c
  index 28a2b15..142df90 100644
  --- a/drivers/gpu/drm/i915/i915_gem_context.c
  +++ b/drivers/gpu/drm/i915/i915_gem_context.c
  @@ -606,7 +606,7 @@ mi_set_context(struct intel_ring_buffer *ring,
  if (ret)
  return ret;
 
  -   /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw */
  +   /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
 
 Do we really need this WA for BDW and CHV? I couldn't find them on my
 docs for gen8...

It's listed in bspec.

 
 Thanks,
 Paulo
 
 
  if (INTEL_INFO(ring-dev)-gen = 7)
  intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE);
  else
  diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
  b/drivers/gpu/drm/i915/intel_ringbuffer.c
  index 913b8ab..24022c5 100644
  --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
  +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
  @@ -581,7 +581,7 @@ static int init_render_ring(struct intel_ring_buffer 
  *ring)
   * to use MI_WAIT_FOR_EVENT within the CS. It should already be
   * programmed to '1' on all products.
   *
  -* WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw
  +* WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw,chv
   */
  if (INTEL_INFO(dev)-gen = 6)
  I915_WRITE(MI_MODE, 
  _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
  --
  1.8.3.2
 
  ___
  Intel-gfx mailing list
  Intel-gfx@lists.freedesktop.org
  http://lists.freedesktop.org/mailman/listinfo/intel-gfx
 
 
 
 -- 
 Paulo Zanoni

-- 
Ville Syrjälä
Intel OTC
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v2 41/71] drm/i915/chv: Add some workaround notes

2014-04-28 Thread ville . syrjala
From: Ville Syrjälä ville.syrj...@linux.intel.com

We implement the following workarounds:
* WaDisableAsyncFlipPerfMode:chv
* WaProgramMiArbOnOffAroundMiSetContext:chv

v2: Drop WaDisableSemaphoreAndSyncFlipWait note

Signed-off-by: Ville Syrjälä ville.syrj...@linux.intel.com
---
 drivers/gpu/drm/i915/i915_gem_context.c | 2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 30b355a..37dc36d 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -614,7 +614,7 @@ mi_set_context(struct intel_ring_buffer *ring,
if (ret)
return ret;
 
-   /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw */
+   /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
if (INTEL_INFO(ring-dev)-gen = 7)
intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE);
else
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index eb3dd26..b025a51 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -599,7 +599,7 @@ static int init_render_ring(struct intel_ring_buffer *ring)
 * to use MI_WAIT_FOR_EVENT within the CS. It should already be
 * programmed to '1' on all products.
 *
-* WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw
+* WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw,chv
 */
if (INTEL_INFO(dev)-gen = 6)
I915_WRITE(MI_MODE, 
_MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 05.1/24] drm/i915: Make sure computed watermarks never overflow the registers

2014-04-28 Thread ville . syrjala
From: Ville Syrjälä ville.syrj...@linux.intel.com

When we calculate the watermarks for a pipe make sure we leave any
level fully zeroed out if it would exceed any of the maximum values
that fit in the registers.

This will be important later when we start to use also disabled
watermark levels during LP1+ merging.

Signed-off-by: Ville Syrjälä ville.syrj...@linux.intel.com
---
 drivers/gpu/drm/i915/intel_pm.c | 43 ++---
 1 file changed, 36 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index f061ef1..c722acb 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -1921,6 +1921,16 @@ static void ilk_compute_wm_maximums(const struct 
drm_device *dev,
max-fbc = ilk_fbc_wm_reg_max(dev);
 }
 
+static void ilk_compute_wm_reg_maximums(struct drm_device *dev,
+   int level,
+   struct ilk_wm_maximums *max)
+{
+   max-pri = ilk_plane_wm_reg_max(dev, level, false);
+   max-spr = ilk_plane_wm_reg_max(dev, level, true);
+   max-cur = ilk_cursor_wm_reg_max(dev, level);
+   max-fbc = ilk_fbc_wm_reg_max(dev);
+}
+
 static bool ilk_validate_wm_level(int level,
  const struct ilk_wm_maximums *max,
  struct intel_wm_level *result)
@@ -2178,9 +2188,6 @@ static bool intel_compute_pipe_wm(struct drm_crtc *crtc,
};
struct ilk_wm_maximums max;
 
-   /* LP0 watermarks always use 1/2 DDB partitioning */
-   ilk_compute_wm_maximums(dev, 0, config, INTEL_DDB_PART_1_2, max);
-
pipe_wm-pipe_enabled = params-active;
pipe_wm-sprites_enabled = params-spr.enabled;
pipe_wm-sprites_scaled = params-spr.scaled;
@@ -2193,15 +2200,37 @@ static bool intel_compute_pipe_wm(struct drm_crtc *crtc,
if (params-spr.scaled)
max_level = 0;
 
-   for (level = 0; level = max_level; level++)
-   ilk_compute_wm_level(dev_priv, level, params,
-pipe_wm-wm[level]);
+   ilk_compute_wm_level(dev_priv, 0, params, pipe_wm-wm[0]);
 
if (IS_HASWELL(dev) || IS_BROADWELL(dev))
pipe_wm-linetime = hsw_compute_linetime_wm(dev, crtc);
 
+   /* LP0 watermarks always use 1/2 DDB partitioning */
+   ilk_compute_wm_maximums(dev, 0, config, INTEL_DDB_PART_1_2, max);
+
/* At least LP0 must be valid */
-   return ilk_validate_wm_level(0, max, pipe_wm-wm[0]);
+   if (!ilk_validate_wm_level(0, max, pipe_wm-wm[0]))
+   return false;
+
+   ilk_compute_wm_reg_maximums(dev, 1, max);
+
+   for (level = 1; level = max_level; level++) {
+   struct intel_wm_level wm = {};
+
+   ilk_compute_wm_level(dev_priv, level, params, wm);
+
+   /*
+* Disable any watermark level that exceeds the
+* register maximums since such watermarks are
+* always invalid.
+*/
+   if (!ilk_validate_wm_level(level, max, wm))
+   break;
+
+   pipe_wm-wm[level] = wm;
+   }
+
+   return true;
 }
 
 /*
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v2 05.2/24] drm/i915: Merge LP1+ watermarks in safer way

2014-04-28 Thread ville . syrjala
From: Ville Syrjälä ville.syrj...@linux.intel.com

On ILK when we disable a particular watermark level, we must
maintain the actual watermark values for that level for some time
(until the next vblank possibly). Otherwise we risk underruns.

In order to achieve that result we must merge the LP1+ watermarks a
bit differently since we must also merge levels that are to be
disabled. We must also make sure we don't overflow the fields in the
watermark registers in case the calculated watermarks come out too
big to fit.

As early as possbile we mark all computed watermark levels as
disabled if they would exceed the register maximums. We make sure
to leave the actual watermarks for such levels zeroed out. The during
merging, we take the maxium values for every level, regardless if
they're disabled or not. That may seem a bit pointless since at the
moment all the watermark levels we merge should have their values
zeroed if the level is already disabled. However soon we will be
dealing with intermediate watermarks that, in addition to the new
watermark values, also contain the previous watermark values, and so
levels that are disabled may no longer be zeroed out.

v2: Split the patch in two (Paulo)
Use if() instead of  when merging -enable (Paulo)

Signed-off-by: Ville Syrjälä ville.syrj...@linux.intel.com
---
 drivers/gpu/drm/i915/intel_pm.c | 37 -
 1 file changed, 28 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index c722acb..b89fc33 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -2242,6 +2242,8 @@ static void ilk_merge_wm_level(struct drm_device *dev,
 {
const struct intel_crtc *intel_crtc;
 
+   ret_wm-enable = true;
+
list_for_each_entry(intel_crtc, dev-mode_config.crtc_list, base.head) 
{
const struct intel_pipe_wm *active = intel_crtc-wm.active;
const struct intel_wm_level *wm = active-wm[level];
@@ -2249,16 +2251,19 @@ static void ilk_merge_wm_level(struct drm_device *dev,
if (!active-pipe_enabled)
continue;
 
+   /*
+* The watermark values may have been used in the past,
+* so we must maintain them in the registers for some
+* time even if the level is now disabled.
+*/
if (!wm-enable)
-   return;
+   ret_wm-enable = false;
 
ret_wm-pri_val = max(ret_wm-pri_val, wm-pri_val);
ret_wm-spr_val = max(ret_wm-spr_val, wm-spr_val);
ret_wm-cur_val = max(ret_wm-cur_val, wm-cur_val);
ret_wm-fbc_val = max(ret_wm-fbc_val, wm-fbc_val);
}
-
-   ret_wm-enable = true;
 }
 
 /*
@@ -2270,6 +2275,7 @@ static void ilk_wm_merge(struct drm_device *dev,
 struct intel_pipe_wm *merged)
 {
int level, max_level = ilk_wm_max_level(dev);
+   int last_enabled_level = max_level;
 
/* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */
if ((INTEL_INFO(dev)-gen = 6 || IS_IVYBRIDGE(dev)) 
@@ -2285,15 +2291,19 @@ static void ilk_wm_merge(struct drm_device *dev,
 
ilk_merge_wm_level(dev, level, wm);
 
-   if (!ilk_validate_wm_level(level, max, wm))
-   break;
+   if (level  last_enabled_level)
+   wm-enable = false;
+   else if (!ilk_validate_wm_level(level, max, wm))
+   /* make sure all following levels get disabled */
+   last_enabled_level = level - 1;
 
/*
 * The spec says it is preferred to disable
 * FBC WMs instead of disabling a WM level.
 */
if (wm-fbc_val  max-fbc) {
-   merged-fbc_wm_enabled = false;
+   if (wm-enable)
+   merged-fbc_wm_enabled = false;
wm-fbc_val = 0;
}
}
@@ -2348,14 +2358,19 @@ static void ilk_compute_wm_results(struct drm_device 
*dev,
level = ilk_wm_lp_to_level(wm_lp, merged);
 
r = merged-wm[level];
-   if (!r-enable)
-   break;
 
-   results-wm_lp[wm_lp - 1] = WM3_LP_EN |
+   /*
+* Maintain the watermark values even if the level is
+* disabled. Doing otherwise could cause underruns.
+*/
+   results-wm_lp[wm_lp - 1] =
(ilk_wm_lp_latency(dev, level)  WM1_LP_LATENCY_SHIFT) 
|
(r-pri_val  WM1_LP_SR_SHIFT) |
r-cur_val;
 
+   if (r-enable)
+   results-wm_lp[wm_lp - 1] |= WM1_LP_SR_EN;
+
if (INTEL_INFO(dev)-gen = 8)

[Intel-gfx] [PATCH v2 09/24] drm/i915: Keep vblank interrupts enabled while enabling/disabling planes

2014-04-28 Thread ville . syrjala
From: Ville Syrjälä ville.syrj...@linux.intel.com

Becasue of the upcoming vblank interrupt driven watermark update
mechanism we will have use for vblank interrupts during plane
enabling/disabling. So don't call drm_vblank_off() until planes
are off, and call drm_vblank_on() just before we start to enable
the planes.

v2: Pimp commit message (Paulo)

Signed-off-by: Ville Syrjälä ville.syrj...@linux.intel.com
---
 drivers/gpu/drm/i915/intel_display.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c 
b/drivers/gpu/drm/i915/intel_display.c
index 88df4ea..8d2a31e 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -3547,6 +3547,8 @@ static void ilk_crtc_enable_planes(struct drm_crtc *crtc)
int pipe = intel_crtc-pipe;
int plane = intel_crtc-plane;
 
+   drm_vblank_on(dev, pipe);
+
intel_enable_primary_plane(dev_priv, plane, pipe);
intel_enable_planes(crtc);
intel_crtc_update_cursor(crtc, true);
@@ -3557,8 +3559,6 @@ static void ilk_crtc_enable_planes(struct drm_crtc *crtc)
mutex_lock(dev-struct_mutex);
intel_update_fbc(dev);
mutex_unlock(dev-struct_mutex);
-
-   drm_vblank_on(dev, pipe);
 }
 
 static void ilk_crtc_disable_planes(struct drm_crtc *crtc)
@@ -3570,7 +3570,6 @@ static void ilk_crtc_disable_planes(struct drm_crtc *crtc)
int plane = intel_crtc-plane;
 
intel_crtc_wait_for_pending_flips(crtc);
-   drm_vblank_off(dev, pipe);
 
if (dev_priv-fbc.plane == plane)
intel_disable_fbc(dev);
@@ -3581,6 +3580,8 @@ static void ilk_crtc_disable_planes(struct drm_crtc *crtc)
intel_disable_planes(crtc);
intel_disable_primary_plane(dev_priv, plane, pipe);
intel_wait_for_vblank(dev, pipe);
+
+   drm_vblank_off(dev, pipe);
 }
 
 static void ironlake_crtc_enable(struct drm_crtc *crtc)
-- 
1.8.3.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PULL] drm-intel-next

2014-04-28 Thread Daniel Vetter
Hi Dave,

drm-intel-next-2014-04-16:
- vlv infoframe fixes from Jesse
- dsi/mipi fixes from Shobhit
- gen8 pageflip fixes for LRI/SRM from Damien
- cmd parser fixes from Brad Volkin
- some prep patches for CHV, DRRS, ...
- and tons of little things all over
drm-intel-next-2014-04-04:
- cmd parser for gen7 but only in enforcing and not yet granting mode - the
  batch copying stuff is still missing. Also performance is a bit ... rough
  (Brad Volkin + OACONTROL fix from Ken).
- deprecate UMS harder (i.e. CONFIG_BROKEN)
- interrupt rework from Paulo Zanoni
- runtime PM support for bdw and snb, again from Paulo
- a pile of refactorings from various people all over the place to prep for new
  stuff (irq reworks, power domain polish, ...)

drm-intel-next-2014-04-04:
- cmd parser for gen7 but only in enforcing and not yet granting mode - the
  batch copying stuff is still missing. Also performance is a bit ... rough
  (Brad Volkin + OACONTROL fix from Ken).
- deprecate UMS harder (i.e. CONFIG_BROKEN)
- interrupt rework from Paulo Zanoni
- runtime PM support for bdw and snb, again from Paulo
- a pile of refactorings from various people all over the place to prep for new
  stuff (irq reworks, power domain polish, ...)

As discussed on irc this contains a (not yet fully tuned and also not yet
in granting mode) cmd parser for gen7. Performance is still a bit rough,
but not quite as bad as originally feared (Ken later on discovered that he
also changed something in his glamour setup which made things worse). If
it doesn't get better (and ofc if we don't get all the missing bits in for
granting mode) I'll disable it before 3.16 again. But I want to give this
beast as much testing as possible for now to avoid ugly regressions once
we switch it on.

Also please don't use the autogenerate merge commit since that'll miss the
stuff from the 1st drm-intel-next tag.

If I read the merges in -nightly correctly there's a bit a conflict in
i915_gem_context.c. I can provide an example merge if you want (or
otherwise just peak at linux-next or drm-intel-nightly).

Cheers, Daniel


The following changes since commit c39b06951f1dc2e384650288676c5b7dcc0ec92c:

  DRM: armada: fix corruption while loading cursors (2014-04-08 10:51:03 +1000)

are available in the git repository at:

  git://anongit.freedesktop.org/drm-intel tags/drm-intel-next-2014-04-16

for you to fetch changes up to c79057922ed6c2c6df1214e6ab4414fea1b23db2:

  drm/i915: Remove vblank wait from haswell_write_eld (2014-04-16 18:52:47 
+0200)


- vlv infoframe fixes from Jesse
- dsi/mipi fixes from Shobhit
- gen8 pageflip fixes for LRI/SRM from Damien
- cmd parser fixes from Brad Volkin
- some prep patches for CHV, DRRS, ...
- and tons of little things all over


Akash Goel (2):
  drm/i915: Enabling the TLB invalidate bit in GFX Mode register
  drm/i915/vlv:Implement the WA 'WaDisable_RenderCache_OperationalFlush'

Ben Widawsky (10):
  drm/i915: Split out GTT specific header file
  drm/i915: Allow full PPGTT with param override
  drm/i915/bdw: Set initial rps freq to RP1
  drm/i915/bdw: Extract rp_state_caps logic
  drm/i915/bdw: RPS frequency bits are the same as HSW
  drm/i915/bdw: Expand FADD to 64bit
  drm/i915: Invariably invalidate before ctx switch
  drm/i915: Unref context on failed eb_create
  drm/i915: Dump the whole context object.
  drm/i915/bdw: Add 42ms delay for IPS disable

Brad Volkin (15):
  drm/i915: Initial command parser table definitions
  drm/i915: Reject privileged commands
  drm/i915: Allow some privileged commands from master
  drm/i915: Add register whitelists for mesa
  drm/i915: Add register whitelist for DRM master
  drm/i915: Enable register whitelist checks
  drm/i915: Reject commands that explicitly generate interrupts
  drm/i915: Enable PPGTT command parser checks
  drm/i915: Reject commands that would store to global HWS page
  drm/i915: Add a CMD_PARSER_VERSION getparam
  drm/i915: Enable command parsing by default
  drm/i915: BUG_ON() when cmd/reg tables are not sorted
  drm/i915: Refactor cmd parser checks into a function
  drm/i915: Track OACONTROL register enable/disable during parsing
  drm/i915: Add more registers to the whitelist for mesa

Chris Wilson (4):
  drm/i915: Rename GFX_TLB_INVALIDATE_ALWAYS
  drm/i915: Add PM interrupt details and RPS thresholds to debugfs
  drm/i915: Move all ring resets before setting the HWS page
  drm/i915: dma_buf_vunmap is presumed not to fail, don't let it

Christoph Jaeger (1):
  drm/i915: drop __FUNCTION__ as argument to DRM_DEBUG_KMS

Damien Lespiau (10):
  drm/i915: Don't store the max cursor width/height in the crtc
  drm/i915: Hide vlv_force_wake_{get, put}() in intel_uncore.c
  drm/i915: Hide the per forcewake-engine 

Re: [Intel-gfx] [RESEND][PATCH][linux-next] Revert drm/i915: fix build warning on 32-bit (v2)

2014-04-28 Thread Daniel Vetter
On Mon, Apr 28, 2014 at 03:03:23PM +0200, Jan Moskyto Matejka wrote:
 This reverts commit 60f2b4af1258c05e6b037af866be81abc24438f7.
 
 The same warning has been fixed in e5081a538a565284fec5f30a937d98e460d5e780 
 and
 these two commits got merged in 74e99a84de2d0980320612db8015ba606af42114 which
 caused another warning. Simply, the reverted commit casted the pointer
 difference to unsigned long and the other commit changed the output type from
 long to ptrdiff_t.
 
 The other commit fixes the original warning the better way so I'm reverting
 this commit now.
 
 Signed-off-by: Jan Moskyto Matejka m...@suse.cz

My apologies for missing this the first time around. Queued for -next,
thanks for the patch.
-Daniel
 ---
  drivers/gpu/drm/i915/i915_cmd_parser.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)
 
 diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
 b/drivers/gpu/drm/i915/i915_cmd_parser.c
 index 4cf6d02..0eaed44 100644
 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c
 +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
 @@ -405,7 +405,7 @@ int i915_parse_cmds(struct intel_ring_buffer *ring,
   DRM_DEBUG_DRIVER(CMD: Command length exceeds batch 
 length: 0x%08X length=%d batchlen=%td\n,
*cmd,
length,
 -  (unsigned long)(batch_end - cmd));
 +  batch_end - cmd);
   ret = -EINVAL;
   break;
   }
 -- 
 1.8.4.5

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: Fix assert_plane warning during FDI link train

2014-04-28 Thread Daniel Vetter
On Fri, Apr 25, 2014 at 10:12:07PM +0300, ville.syrj...@linux.intel.com wrote:
 From: Ville Syrjälä ville.syrj...@linux.intel.com
 
 assert_plane_enabled() is now triggering during FDI link train because
 we no longer enable planes that early.
 
 This problem got introduced in:
  commit a5c4d7bc187bd13bc11ac06bb4ea3a0d4001aa4d
  Author: Ville Syrjälä ville.syrj...@linux.intel.com
  Date:   Fri Mar 7 18:32:13 2014 +0200
 
 drm/i915: Disable/enable planes as the first/last thing during modeset on 
 ILK+
 
 Just drop the assert since we shouldn't need planes for link training.
 
 Signed-off-by: Ville Syrjälä ville.syrj...@linux.intel.com

Queued for -next, thanks for the patch. I've spotted this in a QA report,
but afaik they didn't yet file a bug report.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: bdw: fix RC6 enabled status reporting and disable runtime PM

2014-04-28 Thread Daniel Vetter
On Mon, Apr 28, 2014 at 12:03:59PM +0300, Imre Deak wrote:
 On BDW we don't enable RC6 at the moment, but this isn't reflected in
 the (sanitized) i915.enable_rc6 option. So make enable_rc6 report
 correctly that RC6 is disabled, which will also effectively disable RPM
 on BDW (since RPM depends on RC6).
 
 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=77565
 
 Signed-off-by: Imre Deak imre.d...@intel.com

Officially we've merged runtime PM, but if it doesn't work we need to
temporarily disable. Queued for -next, thanks for the patch.
-Daniel

 ---
  drivers/gpu/drm/i915/intel_pm.c | 4 
  1 file changed, 4 insertions(+)
 
 diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
 index d49ec02..19020e5 100644
 --- a/drivers/gpu/drm/i915/intel_pm.c
 +++ b/drivers/gpu/drm/i915/intel_pm.c
 @@ -3260,6 +3260,10 @@ static int sanitize_rc6_option(const struct drm_device 
 *dev, int enable_rc6)
   if (INTEL_INFO(dev)-gen == 5  !IS_IRONLAKE_M(dev))
   return 0;
  
 + /* Disable RC6 on Broadwell for now */
 + if (IS_BROADWELL(dev))
 + return 0;
 +
   /* Respect the kernel parameter if it is set */
   if (enable_rc6 = 0) {
   int mask;
 -- 
 1.8.4
 
 ___
 Intel-gfx mailing list
 Intel-gfx@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: restore backlight precision when converting from opregion

2014-04-28 Thread Daniel Vetter
On Mon, Apr 28, 2014 at 11:19:29AM +0800, Aaron Lu wrote:
 When we set backlight on behalf of ACPI opregion, we will convert the
 backlight value in the 0-255 range defined in opregion to the actual
 hardware level. Commit 22505b82a2 (drm/i915: avoid brightness overflow
 when doing scale) is meant to fix the overflow problem when doing the
 conversion, but it also caused a problem that the converted hardware
 level doesn't quite represent the intended value: say user wants maximum
 backlight level(255 in opregion's range), then we will calculate the
 actual hardware level to be: level = freq / max * level, where freq is
 the hardware's max backlight level(937 on an user's box), and max and
 level are all 255. The converted value should be 937 but the above
 calculation will yield 765.
 
 To fix this issue, just use 64 bits to do the calculation to keep the
 precision and avoid overflow at the same time.
 
 Buglink: https://bugzilla.kernel.org/show_bug.cgi?id=72491
 Reported-and-tested-by: Nico Schottelius 
 nico-bugzilla.kernel@schottelius.org
 Signed-off-by: Aaron Lu aaron...@intel.com
 ---
  drivers/gpu/drm/i915/intel_panel.c | 5 +
  1 file changed, 1 insertion(+), 4 deletions(-)
 
 diff --git a/drivers/gpu/drm/i915/intel_panel.c 
 b/drivers/gpu/drm/i915/intel_panel.c
 index a953b081ee38..bdd2f24b7a6b 100644
 --- a/drivers/gpu/drm/i915/intel_panel.c
 +++ b/drivers/gpu/drm/i915/intel_panel.c
 @@ -502,10 +502,7 @@ void intel_panel_set_backlight(struct intel_connector 
 *connector, u32 level,
  
   /* scale to hardware max, but be careful to not overflow */
   freq = panel-backlight.max;
 - if (freq  max)
 - level = level * freq / max;
 - else
 - level = freq / max * level;
 + level = (u64)level * freq / max;

64bit divisions won't compile on 32bit. You need one of the DO_DIV macros,
or whatever they're called again. I pain, I know ;-)
-Daniel

  
   panel-backlight.level = level;
   if (panel-backlight.device)
 -- 
 1.9.0
 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH for stable 3.14 only 1/1] drm/i915: restore QUIRK_NO_PCH_PWM_ENABLE

2014-04-28 Thread Daniel Vetter
On Mon, Apr 28, 2014 at 01:10:07PM +0300, Jani Nikula wrote:
 This reverts the bisected regressing
 
 commit bc0bb9fd1c7810407ab810d204bbaecb255fddde
 Author: Jani Nikula jani.nik...@intel.com
 Date:   Thu Nov 14 12:14:29 2013 +0200
 
 drm/i915: remove QUIRK_NO_PCH_PWM_ENABLE
 
 restoring QUIRK_NO_PCH_PWM_ENABLE for a couple of Dell XPS models which
 broke in 3.14.
 
 There is no such revert upstream. We have root caused and fixed the
 issue upstream, without the quirk, with:
 
 commit 39fbc9c8f6765959b55e0b127dd5c57df5a47d67
 Author: Jani Nikula jani.nik...@intel.com
 Date:   Wed Apr 9 11:22:06 2014 +0300
 
 drm/i915: check VBT for supported backlight type
 
 and
 
 commit c675949ec58ca50d5a3ae3c757892f1560f6e896
 Author: Jani Nikula jani.nik...@intel.com
 Date:   Wed Apr 9 11:31:37 2014 +0300
 
 drm/i915: do not setup backlight if not available according to VBT
 
 While the commits are within the stable rules otherwise, and fix more
 machines than just the regressed Dell XPS models, we feel backporting
 them to stable may be too risky. The revert is limited to the broken
 machines, and the impact should be effectively the same as what the
 upstream commits do more generally.
 
 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=76276
 Reported-by: Romain Francoise rom...@orebokech.com
 CC: Kamal Mostafa ka...@canonical.com
 CC: Daniel Vetter dan...@ffwll.ch
 CC: sta...@vger.kernel.org (3.14 only)
 Signed-off-by: Jani Nikula jani.nik...@intel.com

Ack from my side since the VBT-based fix we have in 3.15 really is a bit
too risky for backporting and should get the full -rc cycle for testing.
But we just can't let existing users on affected hw hang in there for 2
months.
-Daniel

 ---
  drivers/gpu/drm/i915/i915_drv.h  |  1 +
  drivers/gpu/drm/i915/intel_display.c | 16 
  drivers/gpu/drm/i915/intel_panel.c   |  4 
  3 files changed, 21 insertions(+)
 
 diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
 index df77e20e3c3d..697f2150a997 100644
 --- a/drivers/gpu/drm/i915/i915_drv.h
 +++ b/drivers/gpu/drm/i915/i915_drv.h
 @@ -734,6 +734,7 @@ enum intel_sbi_destination {
  #define QUIRK_PIPEA_FORCE (10)
  #define QUIRK_LVDS_SSC_DISABLE (11)
  #define QUIRK_INVERT_BRIGHTNESS (12)
 +#define QUIRK_NO_PCH_PWM_ENABLE (13)
  
  struct intel_fbdev;
  struct intel_fbc_work;
 diff --git a/drivers/gpu/drm/i915/intel_display.c 
 b/drivers/gpu/drm/i915/intel_display.c
 index 9b8a7c7ea7fc..963639d9049b 100644
 --- a/drivers/gpu/drm/i915/intel_display.c
 +++ b/drivers/gpu/drm/i915/intel_display.c
 @@ -10771,6 +10771,17 @@ static void quirk_invert_brightness(struct 
 drm_device *dev)
   DRM_INFO(applying inverted panel brightness quirk\n);
  }
  
 +/*
 + * Some machines (Dell XPS13) suffer broken backlight controls if
 + * BLM_PCH_PWM_ENABLE is set.
 + */
 +static void quirk_no_pcm_pwm_enable(struct drm_device *dev)
 +{
 + struct drm_i915_private *dev_priv = dev-dev_private;
 + dev_priv-quirks |= QUIRK_NO_PCH_PWM_ENABLE;
 + DRM_INFO(applying no-PCH_PWM_ENABLE quirk\n);
 +}
 +
  struct intel_quirk {
   int device;
   int subsystem_vendor;
 @@ -10839,6 +10850,11 @@ static struct intel_quirk intel_quirks[] = {
  
   /* Acer Aspire 4736Z */
   { 0x2a42, 0x1025, 0x0260, quirk_invert_brightness },
 +
 + /* Dell XPS13 HD Sandy Bridge */
 + { 0x0116, 0x1028, 0x052e, quirk_no_pcm_pwm_enable },
 + /* Dell XPS13 HD and XPS13 FHD Ivy Bridge */
 + { 0x0166, 0x1028, 0x058b, quirk_no_pcm_pwm_enable },
  };
  
  static void intel_init_quirks(struct drm_device *dev)
 diff --git a/drivers/gpu/drm/i915/intel_panel.c 
 b/drivers/gpu/drm/i915/intel_panel.c
 index 079ea38f14d9..9f1d7a9300e8 100644
 --- a/drivers/gpu/drm/i915/intel_panel.c
 +++ b/drivers/gpu/drm/i915/intel_panel.c
 @@ -671,6 +671,10 @@ static void pch_enable_backlight(struct intel_connector 
 *connector)
   pch_ctl2 = panel-backlight.max  16;
   I915_WRITE(BLC_PWM_PCH_CTL2, pch_ctl2);
  
 + /* XXX: transitional */
 + if (dev_priv-quirks  QUIRK_NO_PCH_PWM_ENABLE)
 + return;
 +
   pch_ctl1 = 0;
   if (panel-backlight.active_low_pwm)
   pch_ctl1 |= BLM_PCH_POLARITY;
 -- 
 1.9.1
 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 03/10] drm/i915/chv: Enable Render Standby (RC6) for Cheeryview

2014-04-28 Thread Imre Deak
On Mon, 2014-04-21 at 13:34 +0530, deepa...@linux.intel.com wrote:
 From: Deepak S deepa...@linux.intel.com
 
 v2: Configure PCBR if BIOS fails allocate pcbr (deepak)
 
 v3: Fix PCBR condition check during CHV RC6 Enable flag set
 
 Signed-off-by: Deepak S deepa...@linux.intel.com
 ---
  drivers/gpu/drm/i915/i915_reg.h |   1 +
  drivers/gpu/drm/i915/intel_pm.c | 100 
 +++-
  2 files changed, 99 insertions(+), 2 deletions(-)
 
 diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
 index b951d61..7090b42 100644
 --- a/drivers/gpu/drm/i915/i915_reg.h
 +++ b/drivers/gpu/drm/i915/i915_reg.h
 @@ -5134,6 +5134,7 @@ enum punit_power_well {
  #define GEN6_GT_GFX_RC6  0x138108
  #define GEN6_GT_GFX_RC6p 0x13810C
  #define GEN6_GT_GFX_RC6pp0x138110
 +#define VLV_PCBR_ADDR_SHIFT  12
  
  #define GEN6_PCODE_MAILBOX   0x138124
  #define   GEN6_PCODE_READY   (131)
 diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
 index f3c5bce..421a4cc 100644
 --- a/drivers/gpu/drm/i915/intel_pm.c
 +++ b/drivers/gpu/drm/i915/intel_pm.c
 @@ -3264,6 +3264,18 @@ static void gen6_disable_rps(struct drm_device *dev)
   gen6_disable_rps_interrupts(dev);
  }
  
 +static void cherryview_disable_rps(struct drm_device *dev)
 +{
 + struct drm_i915_private *dev_priv = dev-dev_private;
 +
 + I915_WRITE(GEN6_RC_CONTROL, 0);
 +
 + if (dev_priv-vlv_pctx) {
 + drm_gem_object_unreference(dev_priv-vlv_pctx-base);
 + dev_priv-vlv_pctx = NULL;
 + }
 +}
 +
  static void valleyview_disable_rps(struct drm_device *dev)
  {
   struct drm_i915_private *dev_priv = dev-dev_private;
 @@ -3642,6 +3654,28 @@ static void valleyview_check_pctx(struct 
 drm_i915_private *dev_priv)
dev_priv-vlv_pctx-stolen-start);
  }
  
 +static void cherryview_setup_pctx(struct drm_device *dev)
 +{
 + struct drm_i915_private *dev_priv = dev-dev_private;
 + unsigned long pctx_paddr;
 + struct i915_gtt *gtt = dev_priv-gtt;
 + u32 pcbr;
 + int pctx_size = 32*1024;
 +
 + pcbr = I915_READ(VLV_PCBR);
 + if ((pcbr  VLV_PCBR_ADDR_SHIFT) == 0) {
 + /*
 +  * From the Gunit register HAS:
 +  * The Gfx driver is expected to program this register and 
 ensure
 +  * proper allocation within Gfx stolen memory.  For example, 
 this
 +  * register should be programmed such than the PCBR range does 
 not
 +  * overlap with other relevant ranges.
 +  */
 + pctx_paddr = (dev_priv-mm.stolen_base + gtt-stolen_size - 
 pctx_size);

This area should be reserved.

 + I915_WRITE(VLV_PCBR, pctx_paddr);
 + }
 +}
 +
  static void valleyview_setup_pctx(struct drm_device *dev)
  {
   struct drm_i915_private *dev_priv = dev-dev_private;
 @@ -3697,6 +3731,61 @@ static void valleyview_cleanup_pctx(struct drm_device 
 *dev)
   dev_priv-vlv_pctx = NULL;
  }
  
 +static void cherryview_enable_rps(struct drm_device *dev)
 +{
 + struct drm_i915_private *dev_priv = dev-dev_private;
 + struct intel_ring_buffer *ring;
 + u32 gtfifodbg, rc6_mode = 0, pcbr;
 + int i;
 +
 + WARN_ON(!mutex_is_locked(dev_priv-rps.hw_lock));
 +
 + if ((gtfifodbg = I915_READ(GTFIFODBG))) {
 + DRM_DEBUG_DRIVER(GT fifo had a previous error %x\n,
 +  gtfifodbg);
 + I915_WRITE(GTFIFODBG, gtfifodbg);
 + }
 +
 + cherryview_setup_pctx(dev);

This should be called from intel_init_gt_powersave().

 +
 + /* 1a  1b: Get forcewake during program sequence. Although the driver
 +  * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
 + gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
 +
 + /* 2a: Program RC6 thresholds.*/
 + I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40  16);
 + I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
 + I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
 +
 + for_each_ring(ring, dev_priv, i)
 + I915_WRITE(RING_MAX_IDLE(ring-mmio_base), 10);
 +
 + I915_WRITE(GEN6_RC6_THRESHOLD, 5); /* 50/125ms per EI */
 +
 + /* allows RC6 residency counter to work */
 + I915_WRITE(VLV_COUNTER_CONTROL,
 +_MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
 +   VLV_MEDIA_RC6_COUNT_EN |
 +   VLV_RENDER_RC6_COUNT_EN));
 +
 + /* Todo: If BIOS has not configured PCBR
 +  *   then allocate in BIOS Reserved */
 +
 + /* For now we assume BIOS is allocating and populating the PCBR  */
 + pcbr = I915_READ(VLV_PCBR);
 +
 + DRM_DEBUG_DRIVER(PCBR offset : 0x%x\n, pcbr);
 +
 + /* 3: Enable RC6 */
 + if ((intel_enable_rc6(dev)  INTEL_RC6_ENABLE) 
 +  

Re: [Intel-gfx] [PATCH 18/71] drm/i915/chv: Add vlv_pipe_to_channel

2014-04-28 Thread Imre Deak
On Wed, 2014-04-09 at 13:28 +0300, ville.syrj...@linux.intel.com wrote:
 From: Chon Ming Lee chon.ming@intel.com
 
 Cherryview has 3 pipes.  Some of the pll dpio offset calculation is
 based on pipe number.  Need to use vlv_pipe_to_channel to calculate the
 correct phy channel to use for the pipe.
 
 Signed-off-by: Chon Ming Lee chon.ming@intel.com

Reviewed-by: Imre Deak imre.d...@intel.com

 ---
  drivers/gpu/drm/i915/intel_drv.h | 14 ++
  1 file changed, 14 insertions(+)
 
 diff --git a/drivers/gpu/drm/i915/intel_drv.h 
 b/drivers/gpu/drm/i915/intel_drv.h
 index 087e471..e572799 100644
 --- a/drivers/gpu/drm/i915/intel_drv.h
 +++ b/drivers/gpu/drm/i915/intel_drv.h
 @@ -544,6 +544,20 @@ vlv_dport_to_channel(struct intel_digital_port *dport)
   }
  }
  
 +static inline int
 +vlv_pipe_to_channel(enum pipe pipe)
 +{
 + switch (pipe) {
 + case PIPE_A:
 + case PIPE_C:
 + return DPIO_CH0;
 + case PIPE_B:
 + return DPIO_CH1;
 + default:
 + BUG();
 + }
 +}
 +
  static inline struct drm_crtc *
  intel_get_crtc_for_pipe(struct drm_device *dev, int pipe)
  {



signature.asc
Description: This is a digitally signed message part
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 43/49] drm/i915/bdw: Handle context switch events

2014-04-28 Thread Mateo Lozano, Oscar
 tmp = I915_READ(GEN8_GT_IIR(0));
 if (tmp) {
 ret = IRQ_HANDLED;
   +
 rcs = tmp  GEN8_RCS_IRQ_SHIFT;
   - bcs = tmp  GEN8_BCS_IRQ_SHIFT;
   + ring = dev_priv-ring[RCS];
 if (rcs  GT_RENDER_USER_INTERRUPT)
   - notify_ring(dev, dev_priv-ring[RCS]);
   + notify_ring(dev, ring);
   + if (rcs  GEN8_GT_CONTEXT_SWITCH_INTERRUPT)
   + gen8_handle_context_events(ring);
  
  Handling the context events here can generate a new execlist submission,
  which if a small enough workload, can finish and generate a new context 
  event
  interrupt before we ack this interrupt.
  
  When we ack this interrupt, we clear the new one too, loosing an interrupt.
  
  Moving the
  
  I915_WRITE(GEN8_GT_IIR(0), tmp);
  
  to just inside the if (tmp) { conditional (or anywhere before this call) 
  fixes this
  issue. There is no harm in acking the interrupt immediately as we have the
  read stored in tmp.
  
 -Original Message-
 From: Daniel, Thomas
 Sent: Monday, April 28, 2014 10:58 AM
 To: Beckett, Robert; Mateo Lozano, Oscar; Barbalho, Rafael; Ewins, Jon
 Subject: RE: Re: [Intel-gfx] [PATCH 43/49] drm/i915/bdw: Handle context switch
 events
 
 Hi Bob,
 
 Looks like a good catch, and a sensible fix.
 
 Thomas.

I agree with Thomas. Will add to the next revision of the series.

Thanks!
Oscar
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 03/10] drm/i915/chv: Enable Render Standby (RC6) for Cheeryview

2014-04-28 Thread Daniel Vetter
On Mon, Apr 28, 2014 at 05:29:46PM +0300, Imre Deak wrote:
  +static void cherryview_setup_pctx(struct drm_device *dev)
  +{
  +   struct drm_i915_private *dev_priv = dev-dev_private;
  +   unsigned long pctx_paddr;
  +   struct i915_gtt *gtt = dev_priv-gtt;
  +   u32 pcbr;
  +   int pctx_size = 32*1024;
  +
  +   pcbr = I915_READ(VLV_PCBR);
  +   if ((pcbr  VLV_PCBR_ADDR_SHIFT) == 0) {
  +   /*
  +* From the Gunit register HAS:
  +* The Gfx driver is expected to program this register and 
  ensure
  +* proper allocation within Gfx stolen memory.  For example, 
  this
  +* register should be programmed such than the PCBR range does 
  not
  +* overlap with other relevant ranges.
  +*/
  +   pctx_paddr = (dev_priv-mm.stolen_base + gtt-stolen_size - 
  pctx_size);
 
 This area should be reserved.

We've had a really lengthy discussion internally about the bios-reserved
chunk in stolen. It was stalled due to (imo unjustified) fear to leak
information what the bios actually uses this for.

If we need to reserve more of stolen than we currently do we need to pick
up that approach again instead of adding more bandaids.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v3] drm/i915: Add boot paramter to control rps boost at boot time.

2014-04-28 Thread deepak . s
From: Deepak S deepa...@linux.intel.com

We are adding a module paramter to control rps boost. By default, we
enable the boost for better performace. Based on the need (perf/power)
we can either enable/disable.

v2: Addressed rps default comment (Jani)

v3: Use bool to represent the boot parameter (Ville).

Signed-off-by: Deepak S deepa...@linux.intel.com
Reviewed-by: Ville Syrjälä ville.syrj...@linux.intel.com
---
 drivers/gpu/drm/i915/i915_drv.h| 1 +
 drivers/gpu/drm/i915/i915_gem.c| 2 +-
 drivers/gpu/drm/i915/i915_params.c | 5 +
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e81feab..6136aab 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1945,6 +1945,7 @@ struct i915_params {
bool reset;
bool disable_display;
bool disable_vtd_wa;
+   bool enable_rps_boost;
 };
 extern struct i915_params i915 __read_mostly;
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b00a77e..f2b3262 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1049,7 +1049,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, 
u32 seqno,
 
timeout_expire = timeout ? jiffies + 
timespec_to_jiffies_timeout(timeout) : 0;
 
-   if (INTEL_INFO(dev)-gen = 6  can_wait_boost(file_priv)) {
+   if (INTEL_INFO(dev)-gen = 6  can_wait_boost(file_priv)  
i915.enable_rps_boost) {
gen6_rps_boost(dev_priv);
if (file_priv)
mod_delayed_work(dev_priv-wq,
diff --git a/drivers/gpu/drm/i915/i915_params.c 
b/drivers/gpu/drm/i915/i915_params.c
index d05a2af..b51da7c 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -48,6 +48,7 @@ struct i915_params i915 __read_mostly = {
.disable_display = 0,
.enable_cmd_parser = 1,
.disable_vtd_wa = 0,
+   .enable_rps_boost = true,
 };
 
 module_param_named(modeset, i915.modeset, int, 0400);
@@ -156,3 +157,7 @@ MODULE_PARM_DESC(disable_vtd_wa, Disable all VT-d 
workarounds (default: false)
 module_param_named(enable_cmd_parser, i915.enable_cmd_parser, int, 0600);
 MODULE_PARM_DESC(enable_cmd_parser,
 Enable command parsing (1=enabled [default], 0=disabled));
+
+module_param_named(enable_rps_boost, i915.enable_rps_boost, bool, 0600);
+MODULE_PARM_DESC(enable_rps_boost,
+Enable/Disable boost RPS frequency (default: true));
-- 
1.8.5.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 66/71] drm/i915/chv: Use RMW to toggle swing calc init

2014-04-28 Thread Mika Kuoppala
ville.syrj...@linux.intel.com writes:

 From: Ville Syrjälä ville.syrj...@linux.intel.com

 The spec only tells us to set individual bits here and there. So we use
 RMW for most things. Do the same for the swing calc init.

 Eventually we should optimize things to just blast the final value in
 with group access whenever possible. But to do that someone needs to
 take a good look at what's the reset value for each registers, and
 possibly if the BIOS manages to frob with some of them. For now
 use RMW access always.

 Signed-off-by: Ville Syrjälä ville.syrj...@linux.intel.com

Some accesses use define masks, some hardcoded ones.
But as they were there to begin with, not a problem of
these patches.

For future work, I think we could get rid of
quite amount of DWXX_CHXX definitions if we
would build macros that setup the function,lane/group
and broadcast.

Patches 65 and 66,
Reviewed-by: Mika Kuoppala mika.kuopp...@intel.com

 ---
  drivers/gpu/drm/i915/i915_reg.h   |  7 +++
  drivers/gpu/drm/i915/intel_dp.c   | 17 ++---
  drivers/gpu/drm/i915/intel_hdmi.c | 18 ++
  3 files changed, 35 insertions(+), 7 deletions(-)

 diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
 index b91232f..7056994 100644
 --- a/drivers/gpu/drm/i915/i915_reg.h
 +++ b/drivers/gpu/drm/i915/i915_reg.h
 @@ -698,6 +698,13 @@ enum punit_power_well {
  #define   DPIO_PCS_SWING_CALC_TX1_TX3(131)
  #define CHV_PCS_DW10(ch) _PORT(ch, _CHV_PCS_DW10_CH0, _CHV_PCS_DW10_CH1)
  
 +#define _VLV_PCS01_DW10_CH0  0x0228
 +#define _VLV_PCS23_DW10_CH0  0x0428
 +#define _VLV_PCS01_DW10_CH1  0x2628
 +#define _VLV_PCS23_DW10_CH1  0x2828
 +#define VLV_PCS01_DW10(port) _PORT(port, _VLV_PCS01_DW10_CH0, 
 _VLV_PCS01_DW10_CH1)
 +#define VLV_PCS23_DW10(port) _PORT(port, _VLV_PCS23_DW10_CH0, 
 _VLV_PCS23_DW10_CH1)
 +
  #define _VLV_PCS_DW11_CH00x822c
  #define _VLV_PCS_DW11_CH10x842c
  #define VLV_PCS_DW11(ch) _PORT(ch, _VLV_PCS_DW11_CH0, _VLV_PCS_DW11_CH1)
 diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
 index 4c54930..9cbd702 100644
 --- a/drivers/gpu/drm/i915/intel_dp.c
 +++ b/drivers/gpu/drm/i915/intel_dp.c
 @@ -2346,7 +2346,13 @@ static uint32_t intel_chv_signal_levels(struct 
 intel_dp *intel_dp)
   mutex_lock(dev_priv-dpio_lock);
  
   /* Clear calc init */
 - vlv_dpio_write(dev_priv, pipe, CHV_PCS_DW10(ch), 0);
 + val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW10(ch));
 + val = ~(DPIO_PCS_SWING_CALC_TX0_TX2 | DPIO_PCS_SWING_CALC_TX1_TX3);
 + vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW10(ch), val);
 +
 + val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW10(ch));
 + val = ~(DPIO_PCS_SWING_CALC_TX0_TX2 | DPIO_PCS_SWING_CALC_TX1_TX3);
 + vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW10(ch), val);
  
   /* Program swing deemph */
   for (i = 0; i  4; i++) {
 @@ -2397,8 +2403,13 @@ static uint32_t intel_chv_signal_levels(struct 
 intel_dp *intel_dp)
   }
  
   /* Start swing calculation */
 - vlv_dpio_write(dev_priv, pipe, CHV_PCS_DW10(ch),
 - (DPIO_PCS_SWING_CALC_TX0_TX2 | DPIO_PCS_SWING_CALC_TX1_TX3));
 + val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW10(ch));
 + val |= DPIO_PCS_SWING_CALC_TX0_TX2 | DPIO_PCS_SWING_CALC_TX1_TX3;
 + vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW10(ch), val);
 +
 + val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW10(ch));
 + val |= DPIO_PCS_SWING_CALC_TX0_TX2 | DPIO_PCS_SWING_CALC_TX1_TX3;
 + vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW10(ch), val);
  
   /* LRC Bypass */
   val = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW30);
 diff --git a/drivers/gpu/drm/i915/intel_hdmi.c 
 b/drivers/gpu/drm/i915/intel_hdmi.c
 index e912554..d2b1186 100644
 --- a/drivers/gpu/drm/i915/intel_hdmi.c
 +++ b/drivers/gpu/drm/i915/intel_hdmi.c
 @@ -1283,7 +1283,13 @@ static void chv_hdmi_pre_enable(struct intel_encoder 
 *encoder)
   /* FIXME: Fix up value only after power analysis */
  
   /* Clear calc init */
 - vlv_dpio_write(dev_priv, pipe, CHV_PCS_DW10(ch), 0);
 + val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW10(ch));
 + val = ~(DPIO_PCS_SWING_CALC_TX0_TX2 | DPIO_PCS_SWING_CALC_TX1_TX3);
 + vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW10(ch), val);
 +
 + val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW10(ch));
 + val = ~(DPIO_PCS_SWING_CALC_TX0_TX2 | DPIO_PCS_SWING_CALC_TX1_TX3);
 + vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW10(ch), val);
  
   /* FIXME: Program the support xxx V-dB */
   /* Use 800mV-0dB */
 @@ -1322,9 +1328,13 @@ static void chv_hdmi_pre_enable(struct intel_encoder 
 *encoder)
   (0x9a  DPIO_UNIQ_TRANS_SCALE_SHIFT));
  #endif
   /* Start swing calculation */
 - vlv_dpio_write(dev_priv, pipe, CHV_PCS_DW10(ch),
 - DPIO_PCS_SWING_CALC_TX0_TX2 |
 - 

[Intel-gfx] [PATCH v6] drm/i915/vlv: WA for Turbo and RC6 to work together.

2014-04-28 Thread deepak . s
From: Deepak S deepa...@linux.intel.com

With RC6 enabled, BYT has an HW issue in determining the right
Gfx busyness.
WA for Turbo + RC6: Use SW based Gfx busy-ness detection to decide
on increasing/decreasing the freq. This logic will monitor C0
counters of render/media power-wells over EI period and takes
necessary action based on these values

v2: Refactor duplicate code. (Ville)

v3: Reformat the comments. (Ville)

v4: Enable required counters and remove unwanted code (Ville)

v5: Added frequency change acceleration support and remove kernel-doc
style comments. (Ville)

v6: Updated comment section and Fix w/a comment. (Ville)

Signed-off-by: Deepak S deepa...@linux.intel.com
Reviewed-by: Ville Syrjälä ville.syrj...@linux.intel.com
---
 drivers/gpu/drm/i915/i915_drv.h |  15 +
 drivers/gpu/drm/i915/i915_irq.c | 133 +++-
 drivers/gpu/drm/i915/i915_reg.h |  11 
 drivers/gpu/drm/i915/intel_pm.c |  12 +++-
 4 files changed, 167 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 6136aab..5251946 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -819,6 +819,12 @@ struct i915_suspend_saved_registers {
u32 savePCH_PORT_HOTPLUG;
 };
 
+struct intel_rps_ei_calc {
+   u32 cz_ts_ei;
+   u32 render_ei_c0;
+   u32 media_ei_c0;
+};
+
 struct intel_gen6_power_mgmt {
/* work and pm_iir are protected by dev_priv-irq_lock */
struct work_struct work;
@@ -843,6 +849,8 @@ struct intel_gen6_power_mgmt {
u8 rp1_freq;/* less than RP0 power/freqency */
u8 rp0_freq;/* Non-overclocked max frequency. */
 
+   u32 ei_interrupt_count;
+
int last_adj;
enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
 
@@ -1414,6 +1422,13 @@ struct drm_i915_private {
/* gen6+ rps state */
struct intel_gen6_power_mgmt rps;
 
+   /* rps wa up ei calculation */
+   struct intel_rps_ei_calc rps_up_ei;
+
+   /* rps wa down ei calculation */
+   struct intel_rps_ei_calc rps_down_ei;
+
+
/* ilk-only ips/rps state. Everything in here is protected by the global
 * mchdev_lock in intel_pm.c */
struct intel_ilk_power_mgmt ips;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 2446e61..7d2efc8 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1116,6 +1116,131 @@ static void notify_ring(struct drm_device *dev,
i915_queue_hangcheck(dev);
 }
 
+static u32 vlv_c0_residency(struct drm_i915_private *dev_priv,
+   struct  intel_rps_ei_calc *rps_ei)
+{
+   u32 cz_ts, cz_freq_khz;
+   u32 render_count, media_count;
+   u32 elapsed_render, elapsed_media, elapsed_time;
+   u32 residency = 0;
+
+   cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
+   cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv-mem_freq * 1000, 4);
+
+   render_count = I915_READ(VLV_RENDER_C0_COUNT_REG);
+   media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG);
+
+   if (rps_ei-cz_ts_ei == 0) {
+   rps_ei-cz_ts_ei = cz_ts;
+   rps_ei-render_ei_c0 = render_count;
+   rps_ei-media_ei_c0 = media_count;
+
+   return dev_priv-rps.cur_freq;
+   }
+
+   elapsed_time = cz_ts - rps_ei-cz_ts_ei;
+   rps_ei-cz_ts_ei = cz_ts;
+
+   elapsed_render = render_count - rps_ei-render_ei_c0;
+   rps_ei-render_ei_c0 = render_count;
+
+   elapsed_media = media_count - rps_ei-media_ei_c0;
+   rps_ei-media_ei_c0 = media_count;
+
+   /* Convert all the counters into common unit of milli sec */
+   elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC;
+   elapsed_render /=  cz_freq_khz;
+   elapsed_media /= cz_freq_khz;
+
+   /*
+* Calculate overall C0 residency percentage
+* only if elapsed time is non zero
+*/
+   if (elapsed_time) {
+   residency =
+   ((max(elapsed_render, elapsed_media) * 100)
+   / elapsed_time);
+   }
+
+   return residency;
+}
+
+/**
+ * vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU
+ * busy-ness calculated from C0 counters of render  media power wells
+ * @dev_priv: DRM device private
+ *
+ */
+static u32 vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
+{
+   u32 residency_C0_up = 0, residency_C0_down = 0;
+   u8 new_delay, adj;
+
+   dev_priv-rps.ei_interrupt_count++;
+
+   WARN_ON(!mutex_is_locked(dev_priv-rps.hw_lock));
+
+
+   if (dev_priv-rps_up_ei.cz_ts_ei == 0) {
+   vlv_c0_residency(dev_priv, dev_priv-rps_up_ei);
+   vlv_c0_residency(dev_priv, dev_priv-rps_down_ei);
+   return dev_priv-rps.cur_freq;
+   }
+
+
+   /*
+* To down throttle, C0 residency should be less than down 

[Intel-gfx] [PATCH] tests/gem_exec_params: One more invalid ring tests

2014-04-28 Thread Daniel Vetter
With the vebox 2 patches the number of internal rings don't match the
number of exposed rings. So add another subtest with an invalid ring
which should be invalid both internally and externally. The bug this
will catch is using the ring structure before validation, which the
old invalide-ring wont be able to due to the internal vebox2 ring.

Signed-off-by: Daniel Vetter daniel.vet...@ffwll.ch
---
 tests/gem_exec_params.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/tests/gem_exec_params.c b/tests/gem_exec_params.c
index 306039c244e3..769969d3fe56 100644
--- a/tests/gem_exec_params.c
+++ b/tests/gem_exec_params.c
@@ -117,6 +117,11 @@ igt_main
RUN_FAIL(EINVAL);
}
igt_subtest(invalid-ring) {
+   execbuf.flags = I915_EXEC_RING_MASK;
+   RUN_FAIL(EINVAL);
+   }
+
+   igt_subtest(invalid-ring2) {
execbuf.flags = LOCAL_I915_EXEC_VEBOX+1;
RUN_FAIL(EINVAL);
}
-- 
1.8.1.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 19/71] drm/i915/chv: Trigger phy common lane reset

2014-04-28 Thread Imre Deak
On Wed, 2014-04-09 at 13:28 +0300, ville.syrj...@linux.intel.com wrote:
 From: Chon Ming Lee chon.ming@intel.com
 
 During cold boot, the display controller needs to deassert the common
 lane reset.  Only do it once during intel_init_dpio for both PHYx2 and
 PHYx1.
 
 Besides, assert the common lane reset when disable pll.  This still
 to be determined whether need to do it by driver.
 
 Signed-off-by: Chon Ming Lee chon.ming@intel.com
 [vsyrjala: Don't disable DPIO PLL when using DSI]
 [vsyrjala: Don't call vlv_disable_pll() by accident on CHV]
 Signed-off-by: Ville Syrjälä ville.syrj...@linux.intel.com
 ---
  drivers/gpu/drm/i915/i915_reg.h  |  8 +
  drivers/gpu/drm/i915/intel_display.c | 66 
 
  2 files changed, 59 insertions(+), 15 deletions(-)
 
 diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
 index 8aea092..8fcf4ea 100644
 --- a/drivers/gpu/drm/i915/i915_reg.h
 +++ b/drivers/gpu/drm/i915/i915_reg.h
 @@ -1391,6 +1391,14 @@ enum punit_power_well {
  /* Additional CHV pll/phy registers */
  #define DPIO_PHY_STATUS  (VLV_DISPLAY_BASE + 0x6240)
  #define   DPLL_PORTD_READY_MASK  (0xf)
 +#define DISPLAY_PHY_CONTROL (VLV_DISPLAY_BASE + 0x60100)
 +#define   PHY_COM_LANE_RESET_DEASSERT(phy, val) \
 + ((phy == DPIO_PHY0) ? (val | 1) : (val | 2))
 +#define   PHY_COM_LANE_RESET_ASSERT(phy, val) \
 + ((phy == DPIO_PHY0) ? (val  ~1) : (val  ~2))
 +#define DISPLAY_PHY_STATUS (VLV_DISPLAY_BASE + 0x60104)
 +#define   PHY_POWERGOOD(phy) ((phy == DPIO_PHY0) ? (131) : (130))
 +
  /*
   * The i830 generation, in LVDS mode, defines P1 as the bit number set within
   * this field (only one bit may be set).
 diff --git a/drivers/gpu/drm/i915/intel_display.c 
 b/drivers/gpu/drm/i915/intel_display.c
 index 153f244..e33667d 100644
 --- a/drivers/gpu/drm/i915/intel_display.c
 +++ b/drivers/gpu/drm/i915/intel_display.c
 @@ -1395,17 +1395,36 @@ static void intel_reset_dpio(struct drm_device *dev)
  DPLL_REFA_CLK_ENABLE_VLV |
  DPLL_INTEGRATED_CRI_CLK_VLV);
  
 - /*
 -  * From VLV2A0_DP_eDP_DPIO_driver_vbios_notes_10.docx -
 -  *  6.  De-assert cmn_reset/side_reset. Same as VLV X0.
 -  *   a. GUnit 0x2110 bit[0] set to 1 (def 0)
 -  *   b. The other bits such as sfr settings / modesel may all be set
 -  *  to 0.

This is VLV specific, so ok to be moved,

 -  *
 -  * This should only be done on init and resume from S3 with both
 -  * PLLs disabled, or we risk losing DPIO and PLL synchronization.
 -  */

but this is also true for CHV, so should stay.

 - I915_WRITE(DPIO_CTL, I915_READ(DPIO_CTL) | DPIO_CMNRST);
 + if (IS_CHERRYVIEW(dev)) {
 + enum dpio_phy phy;
 + u32 val;
 +
 + for (phy = DPIO_PHY0; phy  I915_NUM_PHYS_VLV; phy++) {
 + /* Poll for phypwrgood signal */
 + if (wait_for(I915_READ(DISPLAY_PHY_STATUS) 
 + PHY_POWERGOOD(phy), 1))
 + DRM_ERROR(Display PHY %d is not power up\n, 
 phy);
 +
 + /* Deassert common lane reset for PHY*/
 + val = I915_READ(DISPLAY_PHY_CONTROL);
 + I915_WRITE(DISPLAY_PHY_CONTROL,
 + PHY_COM_LANE_RESET_DEASSERT(phy, val));

Would be clearer not to hide the 'or' in the macro and let
PHY_COM_LANE_RESET_DEASSERT be just the flag itself and do here
I915_WRITE(DISPLAY_PHY_CONTROL, val | PHY_COM_LANE_RESET_DEASSERT(phy));

The above issues are minor, so even without fixing them this patch is
Reviewed-by: Imre Deak imre.d...@intel.com

 + }
 +
 + } else {
 + /*
 +  * From VLV2A0_DP_eDP_DPIO_driver_vbios_notes_10.docx -
 +  *  6.  De-assert cmn_reset/side_reset. Same as VLV X0.
 +  *   a. GUnit 0x2110 bit[0] set to 1 (def 0)
 +  *   b. The other bits such as sfr settings / modesel may all
 +  *  be set to 0.
 +  *
 +  * This should only be done on init and resume from S3 with
 +  * both PLLs disabled, or we risk losing DPIO and PLL
 +  * synchronization.
 +  */
 + I915_WRITE(DPIO_CTL, I915_READ(DPIO_CTL) | DPIO_CMNRST);
 + }
  }
  
  static void vlv_enable_pll(struct intel_crtc *crtc)
 @@ -1529,6 +1548,19 @@ static void vlv_disable_pll(struct drm_i915_private 
 *dev_priv, enum pipe pipe)
   val = DPLL_INTEGRATED_CRI_CLK_VLV | DPLL_REFA_CLK_ENABLE_VLV;
   I915_WRITE(DPLL(pipe), val);
   POSTING_READ(DPLL(pipe));
 +
 +}
 +
 +static void chv_disable_pll(struct drm_i915_private *dev_priv, enum pipe 
 pipe)
 +{
 + int dpll = DPLL(pipe);
 + u32 val;
 +
 + /* Set PLL en = 0 */
 + val = I915_READ(dpll);
 + val = 

Re: [Intel-gfx] [PATCH v3] drm/i915: Add boot paramter to control rps boost at boot time.

2014-04-28 Thread Chris Wilson
On Mon, Apr 28, 2014 at 08:17:04PM +0530, deepa...@linux.intel.com wrote:
 From: Deepak S deepa...@linux.intel.com
 
 We are adding a module paramter to control rps boost. By default, we
 enable the boost for better performace. Based on the need (perf/power)
 we can either enable/disable.
 
 v2: Addressed rps default comment (Jani)
 
 v3: Use bool to represent the boot parameter (Ville).
 
 Signed-off-by: Deepak S deepa...@linux.intel.com
 Reviewed-by: Ville Syrjälä ville.syrj...@linux.intel.com
 ---
  drivers/gpu/drm/i915/i915_drv.h| 1 +
  drivers/gpu/drm/i915/i915_gem.c| 2 +-
  drivers/gpu/drm/i915/i915_params.c | 5 +
  3 files changed, 7 insertions(+), 1 deletion(-)
 
 diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
 index e81feab..6136aab 100644
 --- a/drivers/gpu/drm/i915/i915_drv.h
 +++ b/drivers/gpu/drm/i915/i915_drv.h
 @@ -1945,6 +1945,7 @@ struct i915_params {
   bool reset;
   bool disable_display;
   bool disable_vtd_wa;
 + bool enable_rps_boost;
  };
  extern struct i915_params i915 __read_mostly;
  
 diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
 index b00a77e..f2b3262 100644
 --- a/drivers/gpu/drm/i915/i915_gem.c
 +++ b/drivers/gpu/drm/i915/i915_gem.c
 @@ -1049,7 +1049,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, 
 u32 seqno,
  
   timeout_expire = timeout ? jiffies + 
 timespec_to_jiffies_timeout(timeout) : 0;
  
 - if (INTEL_INFO(dev)-gen = 6  can_wait_boost(file_priv)) {
 + if (INTEL_INFO(dev)-gen = 6  can_wait_boost(file_priv)  
 i915.enable_rps_boost) {

The separate INTEL_INFO was because this used to be a neat
dev_priv-info.gen dereference (and dev used to not be derivable from
file_priv, which itself may be NULL here), but please don't add another
predicate that means can_wait_boost().
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC] drm/i915: Add variable gem object size support to i915

2014-04-28 Thread arun . siluvery
From: Siluvery, Arun arun.siluv...@intel.com

This patch adds support to have gem objects of variable size.
The size of the gem object obj-size is always constant and this fact
is tightly coupled in the driver; this implementation allows to vary
its effective size using an interface similar to fallocate().

A new ioctl() is introduced to mark a range as scratch/usable.
Once marked as scratch, associated backing store is released and the
region is filled with scratch pages. The region can also be unmarked
at a later point in which case new backing pages are created.
The range can be anywhere within the object space, it can have multiple
ranges possibly overlapping forming a large contiguous range.

There is only one single scratch page and Kernel allows to write to this
page; userspace need to keep track of scratch page range otherwise any
subsequent writes to these pages will overwrite previous content.

This feature is useful where the exact size of the object is not clear
at the time of its creation, in such case we usually create an object
with more than the required size but end up using it partially.
In devices where there are tight memory constraints it would be useful
to release that additional space which is currently unused. Using this
interface the region can be simply marked as scratch which releases
its backing store thus reducing the memory pressure on the kernel.

Many thanks to Daniel, ChrisW, Tvrtko, Bob for the idea and feedback
on this implementation.

v2: fix holes in error handling and use consistent data types (Tvrtko)
 - If page allocation fails simply return error; do not try to invoke
   shrinker to free backing store.
 - Release new pages created by us in case of error during page allocation
   or sg_table update.
 - Use 64-bit data types for start and length values to avoid truncation.

Change-Id: Id3339be95dbb6b5c69c39d751986c40ec0ccdaf8
Signed-off-by: Siluvery, Arun arun.siluv...@intel.com
---

Please let me know if I need to submit this as PATCH instead of RFC.
Since this is RFC I have included all changes as a single patch.

 drivers/gpu/drm/i915/i915_dma.c |   1 +
 drivers/gpu/drm/i915/i915_drv.h |   2 +
 drivers/gpu/drm/i915/i915_gem.c | 205 
 include/uapi/drm/i915_drm.h |  31 ++
 4 files changed, 239 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 31c499f..3dd4b1a 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -2000,6 +2000,7 @@ const struct drm_ioctl_desc i915_ioctls[] = {
DRM_IOCTL_DEF_DRV(I915_GET_RESET_STATS, i915_get_reset_stats_ioctl, 
DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, \
DRM_UNLOCKED|DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(I915_GEM_FALLOCATE, i915_gem_fallocate_ioctl, 
DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_SET_PLANE_180_ROTATION, \
i915_set_plane_180_rotation, DRM_AUTH | DRM_UNLOCKED),
DRM_IOCTL_DEF_DRV(I915_ENABLE_PLANE_RESERVED_REG_BIT_2,
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4069800..1f30fb6 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2210,6 +2210,8 @@ int i915_gem_get_tiling(struct drm_device *dev, void 
*data,
 int i915_gem_init_userptr(struct drm_device *dev);
 int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
+int i915_gem_fallocate_ioctl(struct drm_device *dev, void *data,
+   struct drm_file *file);
 int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
 int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6153e01..a0188ee 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -317,6 +317,211 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
   args-size, args-handle);
 }
 
+static int i915_gem_obj_fallocate(struct drm_i915_gem_object *obj,
+ bool mark_scratch, uint64_t start,
+ uint64_t length)
+{
+   int i, j;
+   int ret;
+   uint32_t start_page, end_page;
+   uint32_t page_count;
+   gfp_t gfp;
+   bool update_sg_table = false;
+   unsigned long scratch_pfn;
+   struct page *scratch;
+   struct page **pages;
+   struct sg_table *sg = NULL;
+   struct sg_page_iter sg_iter;
+   struct address_space *mapping;
+   struct drm_i915_private *dev_priv;
+
+   dev_priv = obj-base.dev-dev_private;
+   start_page = start  PAGE_SHIFT;
+   end_page = (start + length)  PAGE_SHIFT;
+   page_count = obj-base.size  

Re: [Intel-gfx] [PATCH 03/10] drm/i915/chv: Enable Render Standby (RC6) for Cheeryview

2014-04-28 Thread Deepak S


On Monday 28 April 2014 08:15 PM, Daniel Vetter wrote:

On Mon, Apr 28, 2014 at 05:29:46PM +0300, Imre Deak wrote:

+static void cherryview_setup_pctx(struct drm_device *dev)
+{
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   unsigned long pctx_paddr;
+   struct i915_gtt *gtt = dev_priv-gtt;
+   u32 pcbr;
+   int pctx_size = 32*1024;
+
+   pcbr = I915_READ(VLV_PCBR);
+   if ((pcbr  VLV_PCBR_ADDR_SHIFT) == 0) {
+   /*
+* From the Gunit register HAS:
+* The Gfx driver is expected to program this register and 
ensure
+* proper allocation within Gfx stolen memory.  For example, 
this
+* register should be programmed such than the PCBR range does 
not
+* overlap with other relevant ranges.
+*/
+   pctx_paddr = (dev_priv-mm.stolen_base + gtt-stolen_size - 
pctx_size);

This area should be reserved.

We've had a really lengthy discussion internally about the bios-reserved
chunk in stolen. It was stalled due to (imo unjustified) fear to leak
information what the bios actually uses this for.

If we need to reserve more of stolen than we currently do we need to pick
up that approach again instead of adding more bandaids.
-Daniel


Agreed. Will change accordingly.

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 03/10] drm/i915/chv: Enable Render Standby (RC6) for Cheeryview

2014-04-28 Thread Deepak S

Thanks for the review. I will address the comments

On Saturday 26 April 2014 03:12 AM, Ben Widawsky wrote:

On Mon, Apr 21, 2014 at 01:34:07PM +0530, deepa...@linux.intel.com wrote:

From: Deepak S deepa...@linux.intel.com

v2: Configure PCBR if BIOS fails allocate pcbr (deepak)

v3: Fix PCBR condition check during CHV RC6 Enable flag set

Signed-off-by: Deepak S deepa...@linux.intel.com
---
  drivers/gpu/drm/i915/i915_reg.h |   1 +
  drivers/gpu/drm/i915/intel_pm.c | 100 +++-
  2 files changed, 99 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index b951d61..7090b42 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -5134,6 +5134,7 @@ enum punit_power_well {
  #define GEN6_GT_GFX_RC6   0x138108
  #define GEN6_GT_GFX_RC6p  0x13810C
  #define GEN6_GT_GFX_RC6pp 0x138110
+#define VLV_PCBR_ADDR_SHIFT12
  
  #define GEN6_PCODE_MAILBOX			0x138124

  #define   GEN6_PCODE_READY(131)
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index f3c5bce..421a4cc 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3264,6 +3264,18 @@ static void gen6_disable_rps(struct drm_device *dev)
gen6_disable_rps_interrupts(dev);
  }
  
+static void cherryview_disable_rps(struct drm_device *dev)

+{
+   struct drm_i915_private *dev_priv = dev-dev_private;
+
+   I915_WRITE(GEN6_RC_CONTROL, 0);
+
+   if (dev_priv-vlv_pctx) {
+   drm_gem_object_unreference(dev_priv-vlv_pctx-base);
+   dev_priv-vlv_pctx = NULL;
+   }
+}
+
  static void valleyview_disable_rps(struct drm_device *dev)
  {
struct drm_i915_private *dev_priv = dev-dev_private;
@@ -3642,6 +3654,28 @@ static void valleyview_check_pctx(struct 
drm_i915_private *dev_priv)
 dev_priv-vlv_pctx-stolen-start);
  }
  
+static void cherryview_setup_pctx(struct drm_device *dev)

+{
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   unsigned long pctx_paddr;
+   struct i915_gtt *gtt = dev_priv-gtt;
+   u32 pcbr;
+   int pctx_size = 32*1024;
+
+   pcbr = I915_READ(VLV_PCBR);
+   if ((pcbr  VLV_PCBR_ADDR_SHIFT) == 0) {
+   /*
+* From the Gunit register HAS:
+* The Gfx driver is expected to program this register and 
ensure
+* proper allocation within Gfx stolen memory.  For example, 
this
+* register should be programmed such than the PCBR range does 
not
+* overlap with other relevant ranges.
+*/
+   pctx_paddr = (dev_priv-mm.stolen_base + gtt-stolen_size - 
pctx_size);
+   I915_WRITE(VLV_PCBR, pctx_paddr);
+   }
+}
+

Is there a reason we did not follow the same idioms as Valleyview?
Shouldn't we be building a stolen object like we do there, and then
using that?

Furthermore, we need to make sure we make the stolen allocator aware for
the case where pcbr is not zero, like we do for valleyview.

I think the best solution here is to try to combine the valleyview and
cherryview logic for this function. Extract out size, and most of the
rest looks pretty similar.

For enabling, I am fine with it as is though provided it's hidden by
preliminary flag.


  static void valleyview_setup_pctx(struct drm_device *dev)
  {
struct drm_i915_private *dev_priv = dev-dev_private;
@@ -3697,6 +3731,61 @@ static void valleyview_cleanup_pctx(struct drm_device 
*dev)
dev_priv-vlv_pctx = NULL;
  }
  
+static void cherryview_enable_rps(struct drm_device *dev)

+{
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   struct intel_ring_buffer *ring;
+   u32 gtfifodbg, rc6_mode = 0, pcbr;
+   int i;
+
+   WARN_ON(!mutex_is_locked(dev_priv-rps.hw_lock));
+
+   if ((gtfifodbg = I915_READ(GTFIFODBG))) {
+   DRM_DEBUG_DRIVER(GT fifo had a previous error %x\n,
+gtfifodbg);
+   I915_WRITE(GTFIFODBG, gtfifodbg);
+   }
+
+   cherryview_setup_pctx(dev);
+
+   /* 1a  1b: Get forcewake during program sequence. Although the driver
+* hasn't enabled a state yet where we need forcewake, BIOS may have.*/
+   gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
+
+   /* 2a: Program RC6 thresholds.*/
+   I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40  16);
+   I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
+   I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
+
+   for_each_ring(ring, dev_priv, i)
+   I915_WRITE(RING_MAX_IDLE(ring-mmio_base), 10);
+
+   I915_WRITE(GEN6_RC6_THRESHOLD, 5); /* 50/125ms per EI */
+
+   /* allows RC6 residency counter to work */
+   

Re: [Intel-gfx] [PATCH 03/10] drm/i915/chv: Enable Render Standby (RC6) for Cheeryview

2014-04-28 Thread Deepak S

Thanks for the review. I will address the comments


On Monday 28 April 2014 07:59 PM, Imre Deak wrote:

On Mon, 2014-04-21 at 13:34 +0530, deepa...@linux.intel.com wrote:

From: Deepak S deepa...@linux.intel.com

v2: Configure PCBR if BIOS fails allocate pcbr (deepak)

v3: Fix PCBR condition check during CHV RC6 Enable flag set

Signed-off-by: Deepak S deepa...@linux.intel.com
---
  drivers/gpu/drm/i915/i915_reg.h |   1 +
  drivers/gpu/drm/i915/intel_pm.c | 100 +++-
  2 files changed, 99 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index b951d61..7090b42 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -5134,6 +5134,7 @@ enum punit_power_well {
  #define GEN6_GT_GFX_RC6   0x138108
  #define GEN6_GT_GFX_RC6p  0x13810C
  #define GEN6_GT_GFX_RC6pp 0x138110
+#define VLV_PCBR_ADDR_SHIFT12
  
  #define GEN6_PCODE_MAILBOX			0x138124

  #define   GEN6_PCODE_READY(131)
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index f3c5bce..421a4cc 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3264,6 +3264,18 @@ static void gen6_disable_rps(struct drm_device *dev)
gen6_disable_rps_interrupts(dev);
  }
  
+static void cherryview_disable_rps(struct drm_device *dev)

+{
+   struct drm_i915_private *dev_priv = dev-dev_private;
+
+   I915_WRITE(GEN6_RC_CONTROL, 0);
+
+   if (dev_priv-vlv_pctx) {
+   drm_gem_object_unreference(dev_priv-vlv_pctx-base);
+   dev_priv-vlv_pctx = NULL;
+   }
+}
+
  static void valleyview_disable_rps(struct drm_device *dev)
  {
struct drm_i915_private *dev_priv = dev-dev_private;
@@ -3642,6 +3654,28 @@ static void valleyview_check_pctx(struct 
drm_i915_private *dev_priv)
 dev_priv-vlv_pctx-stolen-start);
  }
  
+static void cherryview_setup_pctx(struct drm_device *dev)

+{
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   unsigned long pctx_paddr;
+   struct i915_gtt *gtt = dev_priv-gtt;
+   u32 pcbr;
+   int pctx_size = 32*1024;
+
+   pcbr = I915_READ(VLV_PCBR);
+   if ((pcbr  VLV_PCBR_ADDR_SHIFT) == 0) {
+   /*
+* From the Gunit register HAS:
+* The Gfx driver is expected to program this register and 
ensure
+* proper allocation within Gfx stolen memory.  For example, 
this
+* register should be programmed such than the PCBR range does 
not
+* overlap with other relevant ranges.
+*/
+   pctx_paddr = (dev_priv-mm.stolen_base + gtt-stolen_size - 
pctx_size);

This area should be reserved.


+   I915_WRITE(VLV_PCBR, pctx_paddr);
+   }
+}
+
  static void valleyview_setup_pctx(struct drm_device *dev)
  {
struct drm_i915_private *dev_priv = dev-dev_private;
@@ -3697,6 +3731,61 @@ static void valleyview_cleanup_pctx(struct drm_device 
*dev)
dev_priv-vlv_pctx = NULL;
  }
  
+static void cherryview_enable_rps(struct drm_device *dev)

+{
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   struct intel_ring_buffer *ring;
+   u32 gtfifodbg, rc6_mode = 0, pcbr;
+   int i;
+
+   WARN_ON(!mutex_is_locked(dev_priv-rps.hw_lock));
+
+   if ((gtfifodbg = I915_READ(GTFIFODBG))) {
+   DRM_DEBUG_DRIVER(GT fifo had a previous error %x\n,
+gtfifodbg);
+   I915_WRITE(GTFIFODBG, gtfifodbg);
+   }
+
+   cherryview_setup_pctx(dev);

This should be called from intel_init_gt_powersave().


+
+   /* 1a  1b: Get forcewake during program sequence. Although the driver
+* hasn't enabled a state yet where we need forcewake, BIOS may have.*/
+   gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
+
+   /* 2a: Program RC6 thresholds.*/
+   I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40  16);
+   I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
+   I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
+
+   for_each_ring(ring, dev_priv, i)
+   I915_WRITE(RING_MAX_IDLE(ring-mmio_base), 10);
+
+   I915_WRITE(GEN6_RC6_THRESHOLD, 5); /* 50/125ms per EI */
+
+   /* allows RC6 residency counter to work */
+   I915_WRITE(VLV_COUNTER_CONTROL,
+  _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
+ VLV_MEDIA_RC6_COUNT_EN |
+ VLV_RENDER_RC6_COUNT_EN));
+
+   /* Todo: If BIOS has not configured PCBR
+*   then allocate in BIOS Reserved */
+
+   /* For now we assume BIOS is allocating and populating the PCBR  */
+   pcbr = I915_READ(VLV_PCBR);
+
+   DRM_DEBUG_DRIVER(PCBR offset : 

Re: [Intel-gfx] [PATCH v3] drm/i915: Add boot paramter to control rps boost at boot time.

2014-04-28 Thread Daniel Vetter
On Mon, Apr 28, 2014 at 4:47 PM,  deepa...@linux.intel.com wrote:
 From: Deepak S deepa...@linux.intel.com

 We are adding a module paramter to control rps boost. By default, we
 enable the boost for better performace. Based on the need (perf/power)
 we can either enable/disable.

 v2: Addressed rps default comment (Jani)

 v3: Use bool to represent the boot parameter (Ville).

 Signed-off-by: Deepak S deepa...@linux.intel.com
 Reviewed-by: Ville Syrjälä ville.syrj...@linux.intel.com

I'm still unhappy about this since it feels like cheating in
benchmarks and it gives me the impression that you guys frob this at
runtime on Android ;-)

A few more ideas:
1. light-boost: We add some hysteris (either time or whether we're
still above rpe or something like that) and don't boost if this is the
case. I expect that we won't be able to have the full boost benefits
without the downside.

2. eco-boost. We try to boost just enough to not miss the next
frame. For that the app needs to tell us (with two new execbuf flag)
whether it hit or missed the last deadline. Once an app used those
flags for the first time we decrease the boost target freq once per
HIT_DEADLINE and until we get the first MISS_DEADLINE. The we only try
to sporadically test the limit again. TCP flow control theory might be
interesting for copying ideas.

3. runtime-boost-control. The workloads with very predictable
regular loads seem to be known. We can just add a new execbuf NO_BOOST
flag which libva uses on all execbufs but the first one (since we
don't want to drop the first frame really).

Approach 3 should be the simplest to implement and also the simplest
to demonstrate in the open-source libva (since that's always a merge
criteria).

Aside: If you really use this at runtime then you essentially create a
new ABI with this patch. Which means we need open-source userspace for
it anyway.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915: Use hash tables for the command parser

2014-04-28 Thread bradley . d . volkin
From: Brad Volkin bradley.d.vol...@intel.com

For clients that submit large batch buffers the command parser has
a substantial impact on performance. On my HSW ULT system performance
drops as much as ~20% on some tests. Most of the time is spent in the
command lookup code. Converting that from the current naive search to
a hash table lookup reduces the performance impact by as much as ~10%.

The choice of value for I915_CMD_HASH_ORDER allows all commands
currently used in the parser tables to hash to their own bucket (except
for one collision on the render ring). The tradeoff is that it wastes
memory. Because the opcodes for the commands in the tables are not
particularly well distributed, reducing the order still leaves many
buckets empty. The increased collisions don't seem to have a huge
impact on the performance gain, but for now anyhow, the parser trades
memory for performance.

Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
---
 drivers/gpu/drm/i915/i915_cmd_parser.c  | 136 
 drivers/gpu/drm/i915/i915_drv.h |   1 +
 drivers/gpu/drm/i915/intel_ringbuffer.c |   2 +
 drivers/gpu/drm/i915/intel_ringbuffer.h |  11 ++-
 4 files changed, 116 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 9bac097..9dca899 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -498,16 +498,18 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header)
return 0;
 }
 
-static bool validate_cmds_sorted(struct intel_ring_buffer *ring)
+static bool validate_cmds_sorted(struct intel_ring_buffer *ring,
+const struct drm_i915_cmd_table *cmd_tables,
+int cmd_table_count)
 {
int i;
bool ret = true;
 
-   if (!ring-cmd_tables || ring-cmd_table_count == 0)
+   if (!cmd_tables || cmd_table_count == 0)
return true;
 
-   for (i = 0; i  ring-cmd_table_count; i++) {
-   const struct drm_i915_cmd_table *table = ring-cmd_tables[i];
+   for (i = 0; i  cmd_table_count; i++) {
+   const struct drm_i915_cmd_table *table = cmd_tables[i];
u32 previous = 0;
int j;
 
@@ -557,6 +559,60 @@ static bool validate_regs_sorted(struct intel_ring_buffer 
*ring)
 ring-master_reg_count);
 }
 
+struct cmd_node {
+   const struct drm_i915_cmd_descriptor *desc;
+   struct hlist_node node;
+};
+
+/*
+ * Different command ranges have different numbers of bits for the opcode.
+ * In order to use the opcode bits, and only the opcode bits, for the hash key
+ * we should use the MI_* command opcode mask (since those commands use the
+ * fewest bits for the opcode.)
+ */
+#define CMD_HASH_MASK STD_MI_OPCODE_MASK
+
+static int init_hash_table(struct intel_ring_buffer *ring,
+  const struct drm_i915_cmd_table *cmd_tables,
+  int cmd_table_count)
+{
+   int i, j;
+
+   hash_init(ring-cmd_hash);
+
+   for (i = 0; i  cmd_table_count; i++) {
+   const struct drm_i915_cmd_table *table = cmd_tables[i];
+
+   for (j = 0; j  table-count; j++) {
+   const struct drm_i915_cmd_descriptor *desc =
+   table-table[j];
+   struct cmd_node *desc_node =
+   kmalloc(sizeof(*desc_node), GFP_KERNEL);
+
+   if (!desc_node)
+   return -ENOMEM;
+
+   desc_node-desc = desc;
+   hash_add(ring-cmd_hash, desc_node-node,
+desc-cmd.value  CMD_HASH_MASK);
+   }
+   }
+
+   return 0;
+}
+
+static void fini_hash_table(struct intel_ring_buffer *ring)
+{
+   struct hlist_node *tmp;
+   struct cmd_node *desc_node;
+   int i;
+
+   hash_for_each_safe(ring-cmd_hash, i, tmp, desc_node, node) {
+   hash_del(desc_node-node);
+   kfree(desc_node);
+   }
+}
+
 /**
  * i915_cmd_parser_init_ring() - set cmd parser related fields for a ringbuffer
  * @ring: the ringbuffer to initialize
@@ -567,18 +623,21 @@ static bool validate_regs_sorted(struct intel_ring_buffer 
*ring)
  */
 void i915_cmd_parser_init_ring(struct intel_ring_buffer *ring)
 {
+   const struct drm_i915_cmd_table *cmd_tables;
+   int cmd_table_count;
+
if (!IS_GEN7(ring-dev))
return;
 
switch (ring-id) {
case RCS:
if (IS_HASWELL(ring-dev)) {
-   ring-cmd_tables = hsw_render_ring_cmds;
-   ring-cmd_table_count =
+   cmd_tables = hsw_render_ring_cmds;
+   cmd_table_count =
ARRAY_SIZE(hsw_render_ring_cmds);
} else {
- 

Re: [Intel-gfx] [PATCH] drm/i915: Use hash tables for the command parser

2014-04-28 Thread Daniel Vetter
On Mon, Apr 28, 2014 at 08:22:08AM -0700, bradley.d.vol...@intel.com wrote:
 From: Brad Volkin bradley.d.vol...@intel.com
 
 For clients that submit large batch buffers the command parser has
 a substantial impact on performance. On my HSW ULT system performance
 drops as much as ~20% on some tests. Most of the time is spent in the
 command lookup code. Converting that from the current naive search to
 a hash table lookup reduces the performance impact by as much as ~10%.
 
 The choice of value for I915_CMD_HASH_ORDER allows all commands
 currently used in the parser tables to hash to their own bucket (except
 for one collision on the render ring). The tradeoff is that it wastes
 memory. Because the opcodes for the commands in the tables are not
 particularly well distributed, reducing the order still leaves many
 buckets empty. The increased collisions don't seem to have a huge
 impact on the performance gain, but for now anyhow, the parser trades
 memory for performance.
 
 Signed-off-by: Brad Volkin bradley.d.vol...@intel.com

Nice. One idea on top which could be worth a shot is a bloomfilter to
handle all the non-special cases without a (likely) cache miss in the
hashtable. The per-ring bloomfilter would be only loaded once (and if we
place it nearby other stuff the cmdparser needs anyway even that is
amortized).

Also Chris mentioned that blitter loads under X are about the worst case
wrt impact of the cmdparser. Benchmarking x11perf might be a useful
extreme testcase for optimizing this. I guess Chris will jump in with more
ideas?

Thanks, Daniel

 ---
  drivers/gpu/drm/i915/i915_cmd_parser.c  | 136 
 
  drivers/gpu/drm/i915/i915_drv.h |   1 +
  drivers/gpu/drm/i915/intel_ringbuffer.c |   2 +
  drivers/gpu/drm/i915/intel_ringbuffer.h |  11 ++-
  4 files changed, 116 insertions(+), 34 deletions(-)
 
 diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
 b/drivers/gpu/drm/i915/i915_cmd_parser.c
 index 9bac097..9dca899 100644
 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c
 +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
 @@ -498,16 +498,18 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header)
   return 0;
  }
  
 -static bool validate_cmds_sorted(struct intel_ring_buffer *ring)
 +static bool validate_cmds_sorted(struct intel_ring_buffer *ring,
 +  const struct drm_i915_cmd_table *cmd_tables,
 +  int cmd_table_count)
  {
   int i;
   bool ret = true;
  
 - if (!ring-cmd_tables || ring-cmd_table_count == 0)
 + if (!cmd_tables || cmd_table_count == 0)
   return true;
  
 - for (i = 0; i  ring-cmd_table_count; i++) {
 - const struct drm_i915_cmd_table *table = ring-cmd_tables[i];
 + for (i = 0; i  cmd_table_count; i++) {
 + const struct drm_i915_cmd_table *table = cmd_tables[i];
   u32 previous = 0;
   int j;
  
 @@ -557,6 +559,60 @@ static bool validate_regs_sorted(struct 
 intel_ring_buffer *ring)
ring-master_reg_count);
  }
  
 +struct cmd_node {
 + const struct drm_i915_cmd_descriptor *desc;
 + struct hlist_node node;
 +};
 +
 +/*
 + * Different command ranges have different numbers of bits for the opcode.
 + * In order to use the opcode bits, and only the opcode bits, for the hash 
 key
 + * we should use the MI_* command opcode mask (since those commands use the
 + * fewest bits for the opcode.)
 + */
 +#define CMD_HASH_MASK STD_MI_OPCODE_MASK
 +
 +static int init_hash_table(struct intel_ring_buffer *ring,
 +const struct drm_i915_cmd_table *cmd_tables,
 +int cmd_table_count)
 +{
 + int i, j;
 +
 + hash_init(ring-cmd_hash);
 +
 + for (i = 0; i  cmd_table_count; i++) {
 + const struct drm_i915_cmd_table *table = cmd_tables[i];
 +
 + for (j = 0; j  table-count; j++) {
 + const struct drm_i915_cmd_descriptor *desc =
 + table-table[j];
 + struct cmd_node *desc_node =
 + kmalloc(sizeof(*desc_node), GFP_KERNEL);
 +
 + if (!desc_node)
 + return -ENOMEM;
 +
 + desc_node-desc = desc;
 + hash_add(ring-cmd_hash, desc_node-node,
 +  desc-cmd.value  CMD_HASH_MASK);
 + }
 + }
 +
 + return 0;
 +}
 +
 +static void fini_hash_table(struct intel_ring_buffer *ring)
 +{
 + struct hlist_node *tmp;
 + struct cmd_node *desc_node;
 + int i;
 +
 + hash_for_each_safe(ring-cmd_hash, i, tmp, desc_node, node) {
 + hash_del(desc_node-node);
 + kfree(desc_node);
 + }
 +}
 +
  /**
   * i915_cmd_parser_init_ring() - set cmd parser related fields for a 
 ringbuffer
   * @ring: the ringbuffer to initialize
 @@ -567,18 +623,21 @@ static bool 

Re: [Intel-gfx] [PATCH] drm/i915: Use hash tables for the command parser

2014-04-28 Thread Volkin, Bradley D
On Mon, Apr 28, 2014 at 08:22:08AM -0700, Volkin, Bradley D wrote:
 From: Brad Volkin bradley.d.vol...@intel.com
 
 For clients that submit large batch buffers the command parser has
 a substantial impact on performance. On my HSW ULT system performance
 drops as much as ~20% on some tests. Most of the time is spent in the
 command lookup code. Converting that from the current naive search to
 a hash table lookup reduces the performance impact by as much as ~10%.

Tvrtko pointed out that what I wrote here is a bit ambiguous. To clarify:
Without the patch, perf drops 20%
With the patch, perf drops 10%

Brad

 
 The choice of value for I915_CMD_HASH_ORDER allows all commands
 currently used in the parser tables to hash to their own bucket (except
 for one collision on the render ring). The tradeoff is that it wastes
 memory. Because the opcodes for the commands in the tables are not
 particularly well distributed, reducing the order still leaves many
 buckets empty. The increased collisions don't seem to have a huge
 impact on the performance gain, but for now anyhow, the parser trades
 memory for performance.
 
 Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
 ---
  drivers/gpu/drm/i915/i915_cmd_parser.c  | 136 
 
  drivers/gpu/drm/i915/i915_drv.h |   1 +
  drivers/gpu/drm/i915/intel_ringbuffer.c |   2 +
  drivers/gpu/drm/i915/intel_ringbuffer.h |  11 ++-
  4 files changed, 116 insertions(+), 34 deletions(-)
 
 diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
 b/drivers/gpu/drm/i915/i915_cmd_parser.c
 index 9bac097..9dca899 100644
 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c
 +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
 @@ -498,16 +498,18 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header)
   return 0;
  }
  
 -static bool validate_cmds_sorted(struct intel_ring_buffer *ring)
 +static bool validate_cmds_sorted(struct intel_ring_buffer *ring,
 +  const struct drm_i915_cmd_table *cmd_tables,
 +  int cmd_table_count)
  {
   int i;
   bool ret = true;
  
 - if (!ring-cmd_tables || ring-cmd_table_count == 0)
 + if (!cmd_tables || cmd_table_count == 0)
   return true;
  
 - for (i = 0; i  ring-cmd_table_count; i++) {
 - const struct drm_i915_cmd_table *table = ring-cmd_tables[i];
 + for (i = 0; i  cmd_table_count; i++) {
 + const struct drm_i915_cmd_table *table = cmd_tables[i];
   u32 previous = 0;
   int j;
  
 @@ -557,6 +559,60 @@ static bool validate_regs_sorted(struct 
 intel_ring_buffer *ring)
ring-master_reg_count);
  }
  
 +struct cmd_node {
 + const struct drm_i915_cmd_descriptor *desc;
 + struct hlist_node node;
 +};
 +
 +/*
 + * Different command ranges have different numbers of bits for the opcode.
 + * In order to use the opcode bits, and only the opcode bits, for the hash 
 key
 + * we should use the MI_* command opcode mask (since those commands use the
 + * fewest bits for the opcode.)
 + */
 +#define CMD_HASH_MASK STD_MI_OPCODE_MASK
 +
 +static int init_hash_table(struct intel_ring_buffer *ring,
 +const struct drm_i915_cmd_table *cmd_tables,
 +int cmd_table_count)
 +{
 + int i, j;
 +
 + hash_init(ring-cmd_hash);
 +
 + for (i = 0; i  cmd_table_count; i++) {
 + const struct drm_i915_cmd_table *table = cmd_tables[i];
 +
 + for (j = 0; j  table-count; j++) {
 + const struct drm_i915_cmd_descriptor *desc =
 + table-table[j];
 + struct cmd_node *desc_node =
 + kmalloc(sizeof(*desc_node), GFP_KERNEL);
 +
 + if (!desc_node)
 + return -ENOMEM;
 +
 + desc_node-desc = desc;
 + hash_add(ring-cmd_hash, desc_node-node,
 +  desc-cmd.value  CMD_HASH_MASK);
 + }
 + }
 +
 + return 0;
 +}
 +
 +static void fini_hash_table(struct intel_ring_buffer *ring)
 +{
 + struct hlist_node *tmp;
 + struct cmd_node *desc_node;
 + int i;
 +
 + hash_for_each_safe(ring-cmd_hash, i, tmp, desc_node, node) {
 + hash_del(desc_node-node);
 + kfree(desc_node);
 + }
 +}
 +
  /**
   * i915_cmd_parser_init_ring() - set cmd parser related fields for a 
 ringbuffer
   * @ring: the ringbuffer to initialize
 @@ -567,18 +623,21 @@ static bool validate_regs_sorted(struct 
 intel_ring_buffer *ring)
   */
  void i915_cmd_parser_init_ring(struct intel_ring_buffer *ring)
  {
 + const struct drm_i915_cmd_table *cmd_tables;
 + int cmd_table_count;
 +
   if (!IS_GEN7(ring-dev))
   return;
  
   switch (ring-id) {
   case RCS:
   if (IS_HASWELL(ring-dev)) {
 - ring-cmd_tables = hsw_render_ring_cmds;
 -   

Re: [Intel-gfx] [PATCH] drm/i915: Use hash tables for the command parser

2014-04-28 Thread Daniel Vetter
On Mon, Apr 28, 2014 at 08:22:08AM -0700, bradley.d.vol...@intel.com wrote:
 From: Brad Volkin bradley.d.vol...@intel.com
 
 For clients that submit large batch buffers the command parser has
 a substantial impact on performance. On my HSW ULT system performance
 drops as much as ~20% on some tests. Most of the time is spent in the
 command lookup code. Converting that from the current naive search to
 a hash table lookup reduces the performance impact by as much as ~10%.
 
 The choice of value for I915_CMD_HASH_ORDER allows all commands
 currently used in the parser tables to hash to their own bucket (except
 for one collision on the render ring). The tradeoff is that it wastes
 memory. Because the opcodes for the commands in the tables are not
 particularly well distributed, reducing the order still leaves many
 buckets empty. The increased collisions don't seem to have a huge
 impact on the performance gain, but for now anyhow, the parser trades
 memory for performance.

For the collisions have you looked into pre-munging the key a bit so that
we use more bits? A few shifts and xors shouldn't affect perf much really.

Also since the tables are mostly empty we could just overflow to the next
hashtable entry, but unfortunately that would require a bit of custom
insert and lookup code.

Finally if we manage to get 0 collisions a WARN_ON would be good for
that, to make sure we don't accidentally regress.

Anyway just a few more ideas.
-Daniel

Finally if we manage to get 0 collisions a WARN_ON would be good for
that, to make sure we don't accidentally regress.

Anyway just a few more ideas.
-Daniel

 
 Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
 ---
  drivers/gpu/drm/i915/i915_cmd_parser.c  | 136 
 
  drivers/gpu/drm/i915/i915_drv.h |   1 +
  drivers/gpu/drm/i915/intel_ringbuffer.c |   2 +
  drivers/gpu/drm/i915/intel_ringbuffer.h |  11 ++-
  4 files changed, 116 insertions(+), 34 deletions(-)
 
 diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
 b/drivers/gpu/drm/i915/i915_cmd_parser.c
 index 9bac097..9dca899 100644
 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c
 +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
 @@ -498,16 +498,18 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header)
   return 0;
  }
  
 -static bool validate_cmds_sorted(struct intel_ring_buffer *ring)
 +static bool validate_cmds_sorted(struct intel_ring_buffer *ring,
 +  const struct drm_i915_cmd_table *cmd_tables,
 +  int cmd_table_count)
  {
   int i;
   bool ret = true;
  
 - if (!ring-cmd_tables || ring-cmd_table_count == 0)
 + if (!cmd_tables || cmd_table_count == 0)
   return true;
  
 - for (i = 0; i  ring-cmd_table_count; i++) {
 - const struct drm_i915_cmd_table *table = ring-cmd_tables[i];
 + for (i = 0; i  cmd_table_count; i++) {
 + const struct drm_i915_cmd_table *table = cmd_tables[i];
   u32 previous = 0;
   int j;
  
 @@ -557,6 +559,60 @@ static bool validate_regs_sorted(struct 
 intel_ring_buffer *ring)
ring-master_reg_count);
  }
  
 +struct cmd_node {
 + const struct drm_i915_cmd_descriptor *desc;
 + struct hlist_node node;
 +};
 +
 +/*
 + * Different command ranges have different numbers of bits for the opcode.
 + * In order to use the opcode bits, and only the opcode bits, for the hash 
 key
 + * we should use the MI_* command opcode mask (since those commands use the
 + * fewest bits for the opcode.)
 + */
 +#define CMD_HASH_MASK STD_MI_OPCODE_MASK
 +
 +static int init_hash_table(struct intel_ring_buffer *ring,
 +const struct drm_i915_cmd_table *cmd_tables,
 +int cmd_table_count)
 +{
 + int i, j;
 +
 + hash_init(ring-cmd_hash);
 +
 + for (i = 0; i  cmd_table_count; i++) {
 + const struct drm_i915_cmd_table *table = cmd_tables[i];
 +
 + for (j = 0; j  table-count; j++) {
 + const struct drm_i915_cmd_descriptor *desc =
 + table-table[j];
 + struct cmd_node *desc_node =
 + kmalloc(sizeof(*desc_node), GFP_KERNEL);
 +
 + if (!desc_node)
 + return -ENOMEM;
 +
 + desc_node-desc = desc;
 + hash_add(ring-cmd_hash, desc_node-node,
 +  desc-cmd.value  CMD_HASH_MASK);
 + }
 + }
 +
 + return 0;
 +}
 +
 +static void fini_hash_table(struct intel_ring_buffer *ring)
 +{
 + struct hlist_node *tmp;
 + struct cmd_node *desc_node;
 + int i;
 +
 + hash_for_each_safe(ring-cmd_hash, i, tmp, desc_node, node) {
 + hash_del(desc_node-node);
 + kfree(desc_node);
 + }
 +}
 +
  /**
   * i915_cmd_parser_init_ring() - set cmd parser related fields for a 
 ringbuffer

Re: [Intel-gfx] [PATCH] drm/i915: Use hash tables for the command parser

2014-04-28 Thread Volkin, Bradley D
On Mon, Apr 28, 2014 at 08:42:56AM -0700, Daniel Vetter wrote:
 On Mon, Apr 28, 2014 at 08:22:08AM -0700, bradley.d.vol...@intel.com wrote:
  From: Brad Volkin bradley.d.vol...@intel.com
  
  For clients that submit large batch buffers the command parser has
  a substantial impact on performance. On my HSW ULT system performance
  drops as much as ~20% on some tests. Most of the time is spent in the
  command lookup code. Converting that from the current naive search to
  a hash table lookup reduces the performance impact by as much as ~10%.
  
  The choice of value for I915_CMD_HASH_ORDER allows all commands
  currently used in the parser tables to hash to their own bucket (except
  for one collision on the render ring). The tradeoff is that it wastes
  memory. Because the opcodes for the commands in the tables are not
  particularly well distributed, reducing the order still leaves many
  buckets empty. The increased collisions don't seem to have a huge
  impact on the performance gain, but for now anyhow, the parser trades
  memory for performance.
  
  Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
 
 Nice. One idea on top which could be worth a shot is a bloomfilter to
 handle all the non-special cases without a (likely) cache miss in the
 hashtable. The per-ring bloomfilter would be only loaded once (and if we
 place it nearby other stuff the cmdparser needs anyway even that is
 amortized).

Good suggestion. Noted.

 
 Also Chris mentioned that blitter loads under X are about the worst case
 wrt impact of the cmdparser. Benchmarking x11perf might be a useful
 extreme testcase for optimizing this. I guess Chris will jump in with more
 ideas?

Ok, I'll see how x11perf looks with and without this patch as a starting
point.

Brad

 
 Thanks, Daniel
 
  ---
   drivers/gpu/drm/i915/i915_cmd_parser.c  | 136 
  
   drivers/gpu/drm/i915/i915_drv.h |   1 +
   drivers/gpu/drm/i915/intel_ringbuffer.c |   2 +
   drivers/gpu/drm/i915/intel_ringbuffer.h |  11 ++-
   4 files changed, 116 insertions(+), 34 deletions(-)
  
  diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
  b/drivers/gpu/drm/i915/i915_cmd_parser.c
  index 9bac097..9dca899 100644
  --- a/drivers/gpu/drm/i915/i915_cmd_parser.c
  +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
  @@ -498,16 +498,18 @@ static u32 gen7_blt_get_cmd_length_mask(u32 
  cmd_header)
  return 0;
   }
   
  -static bool validate_cmds_sorted(struct intel_ring_buffer *ring)
  +static bool validate_cmds_sorted(struct intel_ring_buffer *ring,
  +const struct drm_i915_cmd_table *cmd_tables,
  +int cmd_table_count)
   {
  int i;
  bool ret = true;
   
  -   if (!ring-cmd_tables || ring-cmd_table_count == 0)
  +   if (!cmd_tables || cmd_table_count == 0)
  return true;
   
  -   for (i = 0; i  ring-cmd_table_count; i++) {
  -   const struct drm_i915_cmd_table *table = ring-cmd_tables[i];
  +   for (i = 0; i  cmd_table_count; i++) {
  +   const struct drm_i915_cmd_table *table = cmd_tables[i];
  u32 previous = 0;
  int j;
   
  @@ -557,6 +559,60 @@ static bool validate_regs_sorted(struct 
  intel_ring_buffer *ring)
   ring-master_reg_count);
   }
   
  +struct cmd_node {
  +   const struct drm_i915_cmd_descriptor *desc;
  +   struct hlist_node node;
  +};
  +
  +/*
  + * Different command ranges have different numbers of bits for the opcode.
  + * In order to use the opcode bits, and only the opcode bits, for the hash 
  key
  + * we should use the MI_* command opcode mask (since those commands use the
  + * fewest bits for the opcode.)
  + */
  +#define CMD_HASH_MASK STD_MI_OPCODE_MASK
  +
  +static int init_hash_table(struct intel_ring_buffer *ring,
  +  const struct drm_i915_cmd_table *cmd_tables,
  +  int cmd_table_count)
  +{
  +   int i, j;
  +
  +   hash_init(ring-cmd_hash);
  +
  +   for (i = 0; i  cmd_table_count; i++) {
  +   const struct drm_i915_cmd_table *table = cmd_tables[i];
  +
  +   for (j = 0; j  table-count; j++) {
  +   const struct drm_i915_cmd_descriptor *desc =
  +   table-table[j];
  +   struct cmd_node *desc_node =
  +   kmalloc(sizeof(*desc_node), GFP_KERNEL);
  +
  +   if (!desc_node)
  +   return -ENOMEM;
  +
  +   desc_node-desc = desc;
  +   hash_add(ring-cmd_hash, desc_node-node,
  +desc-cmd.value  CMD_HASH_MASK);
  +   }
  +   }
  +
  +   return 0;
  +}
  +
  +static void fini_hash_table(struct intel_ring_buffer *ring)
  +{
  +   struct hlist_node *tmp;
  +   struct cmd_node *desc_node;
  +   int i;
  +
  +   hash_for_each_safe(ring-cmd_hash, i, tmp, desc_node, node) {
  +   hash_del(desc_node-node);
  +   

Re: [Intel-gfx] [PATCH] drm/i915: Use hash tables for the command parser

2014-04-28 Thread Volkin, Bradley D
On Mon, Apr 28, 2014 at 08:53:30AM -0700, Daniel Vetter wrote:
 On Mon, Apr 28, 2014 at 08:22:08AM -0700, bradley.d.vol...@intel.com wrote:
  From: Brad Volkin bradley.d.vol...@intel.com
  
  For clients that submit large batch buffers the command parser has
  a substantial impact on performance. On my HSW ULT system performance
  drops as much as ~20% on some tests. Most of the time is spent in the
  command lookup code. Converting that from the current naive search to
  a hash table lookup reduces the performance impact by as much as ~10%.
  
  The choice of value for I915_CMD_HASH_ORDER allows all commands
  currently used in the parser tables to hash to their own bucket (except
  for one collision on the render ring). The tradeoff is that it wastes
  memory. Because the opcodes for the commands in the tables are not
  particularly well distributed, reducing the order still leaves many
  buckets empty. The increased collisions don't seem to have a huge
  impact on the performance gain, but for now anyhow, the parser trades
  memory for performance.
 
 For the collisions have you looked into pre-munging the key a bit so that
 we use more bits? A few shifts and xors shouldn't affect perf much really.

I looked at this briefly but didn't find a substantial improvement. The
basic patch improved things enough that I wanted to just get it out. I
can look into this more, but I'd like to think about implementing the
batch buffer copy portion next. I don't want to optimize this, make people
happy, and then introduce another perf drop from the copy. Better to just
take the full hit now and then continue the improvements. Sound reasonable?

Brad

 
 Also since the tables are mostly empty we could just overflow to the next
 hashtable entry, but unfortunately that would require a bit of custom
 insert and lookup code.
 
 Finally if we manage to get 0 collisions a WARN_ON would be good for
 that, to make sure we don't accidentally regress.
 
 Anyway just a few more ideas.
 -Daniel
 
 Finally if we manage to get 0 collisions a WARN_ON would be good for
 that, to make sure we don't accidentally regress.
 
 Anyway just a few more ideas.
 -Daniel
 
  
  Signed-off-by: Brad Volkin bradley.d.vol...@intel.com
  ---
   drivers/gpu/drm/i915/i915_cmd_parser.c  | 136 
  
   drivers/gpu/drm/i915/i915_drv.h |   1 +
   drivers/gpu/drm/i915/intel_ringbuffer.c |   2 +
   drivers/gpu/drm/i915/intel_ringbuffer.h |  11 ++-
   4 files changed, 116 insertions(+), 34 deletions(-)
  
  diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
  b/drivers/gpu/drm/i915/i915_cmd_parser.c
  index 9bac097..9dca899 100644
  --- a/drivers/gpu/drm/i915/i915_cmd_parser.c
  +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
  @@ -498,16 +498,18 @@ static u32 gen7_blt_get_cmd_length_mask(u32 
  cmd_header)
  return 0;
   }
   
  -static bool validate_cmds_sorted(struct intel_ring_buffer *ring)
  +static bool validate_cmds_sorted(struct intel_ring_buffer *ring,
  +const struct drm_i915_cmd_table *cmd_tables,
  +int cmd_table_count)
   {
  int i;
  bool ret = true;
   
  -   if (!ring-cmd_tables || ring-cmd_table_count == 0)
  +   if (!cmd_tables || cmd_table_count == 0)
  return true;
   
  -   for (i = 0; i  ring-cmd_table_count; i++) {
  -   const struct drm_i915_cmd_table *table = ring-cmd_tables[i];
  +   for (i = 0; i  cmd_table_count; i++) {
  +   const struct drm_i915_cmd_table *table = cmd_tables[i];
  u32 previous = 0;
  int j;
   
  @@ -557,6 +559,60 @@ static bool validate_regs_sorted(struct 
  intel_ring_buffer *ring)
   ring-master_reg_count);
   }
   
  +struct cmd_node {
  +   const struct drm_i915_cmd_descriptor *desc;
  +   struct hlist_node node;
  +};
  +
  +/*
  + * Different command ranges have different numbers of bits for the opcode.
  + * In order to use the opcode bits, and only the opcode bits, for the hash 
  key
  + * we should use the MI_* command opcode mask (since those commands use the
  + * fewest bits for the opcode.)
  + */
  +#define CMD_HASH_MASK STD_MI_OPCODE_MASK
  +
  +static int init_hash_table(struct intel_ring_buffer *ring,
  +  const struct drm_i915_cmd_table *cmd_tables,
  +  int cmd_table_count)
  +{
  +   int i, j;
  +
  +   hash_init(ring-cmd_hash);
  +
  +   for (i = 0; i  cmd_table_count; i++) {
  +   const struct drm_i915_cmd_table *table = cmd_tables[i];
  +
  +   for (j = 0; j  table-count; j++) {
  +   const struct drm_i915_cmd_descriptor *desc =
  +   table-table[j];
  +   struct cmd_node *desc_node =
  +   kmalloc(sizeof(*desc_node), GFP_KERNEL);
  +
  +   if (!desc_node)
  +   return -ENOMEM;
  +
  +   desc_node-desc = desc;
  + 

Re: [Intel-gfx] [PATCH] drm/i915: Use hash tables for the command parser

2014-04-28 Thread Daniel Vetter
On Mon, Apr 28, 2014 at 6:07 PM, Volkin, Bradley D
bradley.d.vol...@intel.com wrote:
 On Mon, Apr 28, 2014 at 08:53:30AM -0700, Daniel Vetter wrote:
 On Mon, Apr 28, 2014 at 08:22:08AM -0700, bradley.d.vol...@intel.com wrote:
  From: Brad Volkin bradley.d.vol...@intel.com
 
  For clients that submit large batch buffers the command parser has
  a substantial impact on performance. On my HSW ULT system performance
  drops as much as ~20% on some tests. Most of the time is spent in the
  command lookup code. Converting that from the current naive search to
  a hash table lookup reduces the performance impact by as much as ~10%.
 
  The choice of value for I915_CMD_HASH_ORDER allows all commands
  currently used in the parser tables to hash to their own bucket (except
  for one collision on the render ring). The tradeoff is that it wastes
  memory. Because the opcodes for the commands in the tables are not
  particularly well distributed, reducing the order still leaves many
  buckets empty. The increased collisions don't seem to have a huge
  impact on the performance gain, but for now anyhow, the parser trades
  memory for performance.

 For the collisions have you looked into pre-munging the key a bit so that
 we use more bits? A few shifts and xors shouldn't affect perf much really.

 I looked at this briefly but didn't find a substantial improvement. The
 basic patch improved things enough that I wanted to just get it out. I
 can look into this more, but I'd like to think about implementing the
 batch buffer copy portion next. I don't want to optimize this, make people
 happy, and then introduce another perf drop from the copy. Better to just
 take the full hit now and then continue the improvements. Sound reasonable?

Yeah, makes sense. Like I've said just throwing around ideas.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 11/14] drm/i915/bdw: Add WT caching ability

2014-04-28 Thread Volkin, Bradley D
On Fri, Apr 18, 2014 at 02:04:27PM -0700, Rodrigo Vivi wrote:
 From: Ben Widawsky benjamin.widaw...@intel.com
 
 I don't have any insight on what parts can do what. The docs do seem to
 suggest WT caching works in at least the same manner as it doesn't on
 Haswell.

As Ben previously mentioned, s/doesn't/does. Other than that, looks good

Reviewed-by: Brad Volkin bradley.d.vol...@intel.com

 
 The addr = 0  is to shut up GCC:
 drivers/gpu/drm/i915/i915_gem_gtt.c:80:7: warning: 'addr' may be used
 uninitialized in this function [-Wmaybe-uninitialized]
 
 Signed-off-by: Ben Widawsky b...@bwidawsk.net
 Signed-off-by: Rodrigo Vivi rodrigo.v...@gmail.com
 ---
  drivers/gpu/drm/i915/i915_drv.h | 11 ++-
  drivers/gpu/drm/i915/i915_gem_gtt.c | 17 +
  2 files changed, 19 insertions(+), 9 deletions(-)
 
 diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
 index 4e81ce1..2bc6745 100644
 --- a/drivers/gpu/drm/i915/i915_drv.h
 +++ b/drivers/gpu/drm/i915/i915_drv.h
 @@ -1835,11 +1835,12 @@ struct drm_i915_cmd_table {
  #define BSD_RING (1VCS)
  #define BLT_RING (1BCS)
  #define VEBOX_RING   (1VECS)
 -#define HAS_BSD(dev)(INTEL_INFO(dev)-ring_mask  BSD_RING)
 -#define HAS_BLT(dev)(INTEL_INFO(dev)-ring_mask  BLT_RING)
 -#define HAS_VEBOX(dev)(INTEL_INFO(dev)-ring_mask  VEBOX_RING)
 -#define HAS_LLC(dev)(INTEL_INFO(dev)-has_llc)
 -#define HAS_WT(dev)(IS_HASWELL(dev)  to_i915(dev)-ellc_size)
 +#define HAS_BSD(dev) (INTEL_INFO(dev)-ring_mask  BSD_RING)
 +#define HAS_BLT(dev) (INTEL_INFO(dev)-ring_mask  BLT_RING)
 +#define HAS_VEBOX(dev)   (INTEL_INFO(dev)-ring_mask  
 VEBOX_RING)
 +#define HAS_LLC(dev) (INTEL_INFO(dev)-has_llc)
 +#define HAS_WT(dev)  ((IS_HASWELL(dev) || IS_BROADWELL(dev))  \
 +  to_i915(dev)-ellc_size)
  #define I915_NEED_GFX_HWS(dev)   (INTEL_INFO(dev)-need_gfx_hws)
  
  #define HAS_HW_CONTEXTS(dev) (INTEL_INFO(dev)-gen = 6)
 diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
 b/drivers/gpu/drm/i915/i915_gem_gtt.c
 index 0d514ff..4969162 100644
 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
 +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
 @@ -68,10 +68,19 @@ static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t 
 addr,
  {
   gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
   pte |= addr;
 - if (level != I915_CACHE_NONE)
 - pte |= PPAT_CACHED_INDEX;
 - else
 +
 + switch (level) {
 + case I915_CACHE_NONE:
   pte |= PPAT_UNCACHED_INDEX;
 + break;
 + case I915_CACHE_WT:
 + pte |= PPAT_DISPLAY_ELLC_INDEX;
 + break;
 + default:
 + pte |= PPAT_CACHED_INDEX;
 + break;
 + }
 +
   return pte;
  }
  
 @@ -1368,7 +1377,7 @@ static void gen8_ggtt_insert_entries(struct 
 i915_address_space *vm,
   (gen8_gtt_pte_t __iomem *)dev_priv-gtt.gsm + first_entry;
   int i = 0;
   struct sg_page_iter sg_iter;
 - dma_addr_t addr;
 + dma_addr_t addr = 0;
  
   for_each_sg_page(st-sgl, sg_iter, st-nents, 0) {
   addr = sg_dma_address(sg_iter.sg) +
 -- 
 1.8.3.1
 
 ___
 Intel-gfx mailing list
 Intel-gfx@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/intel-gfx
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 12/14] drm/i915/bdw: enable eDRAM.

2014-04-28 Thread Volkin, Bradley D
Reviewed-by: Brad Volkin bradley.d.vol...@intel.com

On Fri, Apr 18, 2014 at 02:04:28PM -0700, Rodrigo Vivi wrote:
 From: Ben Widawsky benjamin.widaw...@intel.com
 
 The same register exists for querying and programming eDRAM AKA eLLC. So
 we can simply use it. For now, use all the same defaults as we had
 for Haswell, since like Haswell, I have no further details.
 
 I do not actually have a part with eDRAM, so I cannot test this.
 
 Signed-off-by: Ben Widawsky b...@bwidawsk.net
 Signed-off-by: Rodrigo Vivi rodrigo.v...@gmail.com
 ---
  drivers/gpu/drm/i915/intel_uncore.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)
 
 diff --git a/drivers/gpu/drm/i915/intel_uncore.c 
 b/drivers/gpu/drm/i915/intel_uncore.c
 index c8969e3..0e6b502 100644
 --- a/drivers/gpu/drm/i915/intel_uncore.c
 +++ b/drivers/gpu/drm/i915/intel_uncore.c
 @@ -373,7 +373,7 @@ void intel_uncore_early_sanitize(struct drm_device *dev)
   if (HAS_FPGA_DBG_UNCLAIMED(dev))
   __raw_i915_write32(dev_priv, FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
  
 - if (IS_HASWELL(dev) 
 + if ((IS_HASWELL(dev) || IS_BROADWELL(dev)) 
   (__raw_i915_read32(dev_priv, HSW_EDRAM_PRESENT) == 1)) {
   /* The docs do not explain exactly how the calculation can be
* made. It is somewhat guessable, but for now, it's always
 -- 
 1.8.3.1
 
 ___
 Intel-gfx mailing list
 Intel-gfx@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/intel-gfx
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 13/14] drm/i915/bdw: Disable idle DOP clock gating

2014-04-28 Thread Volkin, Bradley D
Reviewed-by: Brad Volkin bradley.d.vol...@intel.com

On Fri, Apr 18, 2014 at 02:04:29PM -0700, Rodrigo Vivi wrote:
 From: Ben Widawsky benjamin.widaw...@intel.com
 
 It seems we need this at least for the current platforms we have, but
 probably not later. In any event, it should cause too much harm as we do
 the same thing on several other platforms.
 
 Signed-off-by: Ben Widawsky b...@bwidawsk.net
 Signed-off-by: Rodrigo Vivi rodrigo.v...@gmail.com
 ---
  drivers/gpu/drm/i915/intel_pm.c | 4 
  1 file changed, 4 insertions(+)
 
 diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
 index a66000c..8d40786 100644
 --- a/drivers/gpu/drm/i915/intel_pm.c
 +++ b/drivers/gpu/drm/i915/intel_pm.c
 @@ -4924,6 +4924,10 @@ static void gen8_init_clock_gating(struct drm_device 
 *dev)
   I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
  _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE));
  
 + /* WaDisableDopClockGating:bdw May not be needed for production */
 + I915_WRITE(GEN7_ROW_CHICKEN2,
 +_MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
 +
   /* WaSwitchSolVfFArbitrationPriority:bdw */
   I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
  
 -- 
 1.8.3.1
 
 ___
 Intel-gfx mailing list
 Intel-gfx@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/intel-gfx
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] mm: Throttle shrinkers harder

2014-04-28 Thread Dave Hansen
On 04/26/2014 06:10 AM, Chris Wilson wrote:
   Thanks for the pointer to
   register_oom_notifier(), I can use that to make sure that we do purge
   everything from the GPU, and do a sanity check at the same time, before
   we start killing processes.
  
  Actually, that one doesn't get called until we're *SURE* we are going to
  OOM.  Any action taken in there won't be taken in to account.
 blocking_notifier_call_chain(oom_notify_list, 0, freed);
 if (freed  0)
   /* Got some memory back in the last second. */
   return;
 
 That looks like it should abort the oom and so repeat the allocation
 attempt? Or is that too hopeful?

You're correct.  I was reading the code utterly wrong.

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] tests/pm_pc8: skip the test if runtime PM is disabled

2014-04-28 Thread Paulo Zanoni
2014-04-25 5:08 GMT-03:00 Daniel Vetter dan...@ffwll.ch:
 On Fri, Apr 25, 2014 at 10:29:57AM +0300, Imre Deak wrote:
 The PC8 state won't be entered unless runtime PM is enabled, so support
 for PC8 residency counters alone is not enough to run this test.

This is true only for the very latest kernels. We have Kernels with
PC8 support and without runtime PM support. Do you actually need this
specific patch to solve any problems you're currently having? If not,
maybe we could revert it so people with stable Kernels will be able to
run IGT.

Thanks,
Paulo


 Signed-off-by: Imre Deak imre.d...@intel.com

 Reviewed-by: Daniel Vetter daniel.vet...@ffwll.ch

 ---
  tests/pm_pc8.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

 diff --git a/tests/pm_pc8.c b/tests/pm_pc8.c
 index 010af44..9a95326 100644
 --- a/tests/pm_pc8.c
 +++ b/tests/pm_pc8.c
 @@ -769,7 +769,7 @@ static void setup_environment(void)
   printf(Runtime PM support: %d\n, has_runtime_pm);
   printf(PC8 residency support: %d\n, has_pc8);

 - igt_require(has_runtime_pm || has_pc8);
 + igt_require(has_runtime_pm);
  }

  static void teardown_environment(void)
 --
 1.8.4

 ___
 Intel-gfx mailing list
 Intel-gfx@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/intel-gfx

 --
 Daniel Vetter
 Software Engineer, Intel Corporation
 +41 (0) 79 365 57 48 - http://blog.ffwll.ch
 ___
 Intel-gfx mailing list
 Intel-gfx@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/intel-gfx



-- 
Paulo Zanoni
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] Revert drm/i915: fix infinite loop at gen6_update_ring_freq

2014-04-28 Thread Paulo Zanoni
2014-04-23 4:05 GMT-03:00 Daniel Vetter dan...@ffwll.ch:
 On Tue, Apr 22, 2014 at 06:25:12PM -0300, Paulo Zanoni wrote:
 2014-04-11 6:02 GMT-03:00 Daniel Vetter dan...@ffwll.ch:
  On Thu, Apr 10, 2014 at 10:52:26AM -0700, Ben Widawsky wrote:
  On Thu, Apr 10, 2014 at 10:50:43AM -0700, Ben Widawsky wrote:
   On Thu, Apr 10, 2014 at 09:04:47AM +0200, Daniel Vetter wrote:
This reverts commit 4b28a1f3ef55a3b0b68dbab1fe6dbaf18e186710.
   
This patch duct-tapes over some issue in the current bdw rps patches
which must wait with enabling rc6/rps until the very first batch has
been submitted by userspace.
   
But those patches aren't merged yet, and for upstream we need to have
an in-kernel emission of the very first batch. I shouldn't have
merged this patch so let's revert it again.
  
   I said this on the mailing last before you merged the patch.
 
  20140402050338.gb13...@bwidawsk.net
 
  20140402145813.GV7225@phenom.ffwll.local will explain things.

 There's now a regression report pointing to the revert:
 https://bugs.freedesktop.org/show_bug.cgi?id=77565 .

 What is the proposed solution now? Just WARN and still avoid the
 infinite loop? Or keep the infinite loop and leave the bug open?
 Disable BDW runtime PM?

 I've thought that we can only hit this with the as-yet unmerged rc6
 patches on bdw, so I'm really confused why this blows up now?

 In any case I've thought Imre has stumbled over a similar issue on byt and
 he has a fix to prevent runtime pm until the delayed rps init has run.
 I've assigned the bug to him.

 Still confused why this suddenly blew up ...

Sorry for the delayed response.

The bug is very simple: since we did not enable RC6, by the time we
run gen6_update_ring_freq(), the RPS limits will all be zero. The loop
decrements a variable until it reaches a point where it is smaller
than the other. But since the other variable is zero, the loop won't
end since we can't be smaller than zero on the unsigned world, no
matter how much we decrement it.

This can probably be reproduced on non-BDW machines too, with RC6 disabled.

 -Daniel
 --
 Daniel Vetter
 Software Engineer, Intel Corporation
 +41 (0) 79 365 57 48 - http://blog.ffwll.ch



-- 
Paulo Zanoni
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] tests/pm_pc8: skip the test if runtime PM is disabled

2014-04-28 Thread Imre Deak
On Mon, 2014-04-28 at 14:57 -0300, Paulo Zanoni wrote:
 2014-04-25 5:08 GMT-03:00 Daniel Vetter dan...@ffwll.ch:
  On Fri, Apr 25, 2014 at 10:29:57AM +0300, Imre Deak wrote:
  The PC8 state won't be entered unless runtime PM is enabled, so support
  for PC8 residency counters alone is not enough to run this test.
 
 This is true only for the very latest kernels. We have Kernels with
 PC8 support and without runtime PM support. Do you actually need this
 specific patch to solve any problems you're currently having? If not,
 maybe we could revert it so people with stable Kernels will be able to
 run IGT.

The following one needs this at least on BDW/current kernel:
https://bugs.freedesktop.org/show_bug.cgi?id=77565

I might be wrong, but I thought we don't need backward compatibility in
igt. We could also make the check kernel version dependent, I'm not sure
if it's worth the effort.

--Imre

 
 Thanks,
 Paulo
 
 
  Signed-off-by: Imre Deak imre.d...@intel.com
 
  Reviewed-by: Daniel Vetter daniel.vet...@ffwll.ch
 
  ---
   tests/pm_pc8.c | 2 +-
   1 file changed, 1 insertion(+), 1 deletion(-)
 
  diff --git a/tests/pm_pc8.c b/tests/pm_pc8.c
  index 010af44..9a95326 100644
  --- a/tests/pm_pc8.c
  +++ b/tests/pm_pc8.c
  @@ -769,7 +769,7 @@ static void setup_environment(void)
printf(Runtime PM support: %d\n, has_runtime_pm);
printf(PC8 residency support: %d\n, has_pc8);
 
  - igt_require(has_runtime_pm || has_pc8);
  + igt_require(has_runtime_pm);
   }
 
   static void teardown_environment(void)
  --
  1.8.4
 
  ___
  Intel-gfx mailing list
  Intel-gfx@lists.freedesktop.org
  http://lists.freedesktop.org/mailman/listinfo/intel-gfx
 
  --
  Daniel Vetter
  Software Engineer, Intel Corporation
  +41 (0) 79 365 57 48 - http://blog.ffwll.ch
  ___
  Intel-gfx mailing list
  Intel-gfx@lists.freedesktop.org
  http://lists.freedesktop.org/mailman/listinfo/intel-gfx
 
 
 


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] tests/pm_pc8: skip the test if runtime PM is disabled

2014-04-28 Thread Paulo Zanoni
2014-04-28 15:22 GMT-03:00 Imre Deak imre.d...@intel.com:
 On Mon, 2014-04-28 at 14:57 -0300, Paulo Zanoni wrote:
 2014-04-25 5:08 GMT-03:00 Daniel Vetter dan...@ffwll.ch:
  On Fri, Apr 25, 2014 at 10:29:57AM +0300, Imre Deak wrote:
  The PC8 state won't be entered unless runtime PM is enabled, so support
  for PC8 residency counters alone is not enough to run this test.

 This is true only for the very latest kernels. We have Kernels with
 PC8 support and without runtime PM support. Do you actually need this
 specific patch to solve any problems you're currently having? If not,
 maybe we could revert it so people with stable Kernels will be able to
 run IGT.

 The following one needs this at least on BDW/current kernel:
 https://bugs.freedesktop.org/show_bug.cgi?id=77565

This is not a fix to the bug. By skipping the whole test suite, this
patch is just hiding the fact that PC8 doesn't work on BDW. Yes, PC8
on BDW (specifically) won't work without runtime PM, but that's not
true for HSW on some Kernels. The proper fix to the bug above is to
fix RC6 on BDW, or revert the revert.


 I might be wrong, but I thought we don't need backward compatibility in
 igt. We could also make the check kernel version dependent, I'm not sure
 if it's worth the effort.

 --Imre


 Thanks,
 Paulo

 
  Signed-off-by: Imre Deak imre.d...@intel.com
 
  Reviewed-by: Daniel Vetter daniel.vet...@ffwll.ch
 
  ---
   tests/pm_pc8.c | 2 +-
   1 file changed, 1 insertion(+), 1 deletion(-)
 
  diff --git a/tests/pm_pc8.c b/tests/pm_pc8.c
  index 010af44..9a95326 100644
  --- a/tests/pm_pc8.c
  +++ b/tests/pm_pc8.c
  @@ -769,7 +769,7 @@ static void setup_environment(void)
printf(Runtime PM support: %d\n, has_runtime_pm);
printf(PC8 residency support: %d\n, has_pc8);
 
  - igt_require(has_runtime_pm || has_pc8);
  + igt_require(has_runtime_pm);
   }
 
   static void teardown_environment(void)
  --
  1.8.4
 
  ___
  Intel-gfx mailing list
  Intel-gfx@lists.freedesktop.org
  http://lists.freedesktop.org/mailman/listinfo/intel-gfx
 
  --
  Daniel Vetter
  Software Engineer, Intel Corporation
  +41 (0) 79 365 57 48 - http://blog.ffwll.ch
  ___
  Intel-gfx mailing list
  Intel-gfx@lists.freedesktop.org
  http://lists.freedesktop.org/mailman/listinfo/intel-gfx








-- 
Paulo Zanoni
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] tests/pm_pc8: skip the test if runtime PM is disabled

2014-04-28 Thread Imre Deak
On Mon, 2014-04-28 at 15:35 -0300, Paulo Zanoni wrote:
 2014-04-28 15:22 GMT-03:00 Imre Deak imre.d...@intel.com:
  On Mon, 2014-04-28 at 14:57 -0300, Paulo Zanoni wrote:
  2014-04-25 5:08 GMT-03:00 Daniel Vetter dan...@ffwll.ch:
   On Fri, Apr 25, 2014 at 10:29:57AM +0300, Imre Deak wrote:
   The PC8 state won't be entered unless runtime PM is enabled, so support
   for PC8 residency counters alone is not enough to run this test.
 
  This is true only for the very latest kernels. We have Kernels with
  PC8 support and without runtime PM support. Do you actually need this
  specific patch to solve any problems you're currently having? If not,
  maybe we could revert it so people with stable Kernels will be able to
  run IGT.
 
  The following one needs this at least on BDW/current kernel:
  https://bugs.freedesktop.org/show_bug.cgi?id=77565
 
 This is not a fix to the bug. By skipping the whole test suite, this
 patch is just hiding the fact that PC8 doesn't work on BDW. Yes, PC8
 on BDW (specifically) won't work without runtime PM, but that's not
 true for HSW on some Kernels. The proper fix to the bug above is to
 fix RC6 on BDW, or revert the revert.

The igt patch is not supposed to fix the issue, but it is correct for
new kernels on all platforms. In case runtime PM is disabled - for
example because it didn't get enabled in the first place or because RC6
is force disabled through a module option - the test should be skipped.

--Imre

 
 
  I might be wrong, but I thought we don't need backward compatibility in
  igt. We could also make the check kernel version dependent, I'm not sure
  if it's worth the effort.
 
  --Imre
 
 
  Thanks,
  Paulo
 
  
   Signed-off-by: Imre Deak imre.d...@intel.com
  
   Reviewed-by: Daniel Vetter daniel.vet...@ffwll.ch
  
   ---
tests/pm_pc8.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
  
   diff --git a/tests/pm_pc8.c b/tests/pm_pc8.c
   index 010af44..9a95326 100644
   --- a/tests/pm_pc8.c
   +++ b/tests/pm_pc8.c
   @@ -769,7 +769,7 @@ static void setup_environment(void)
 printf(Runtime PM support: %d\n, has_runtime_pm);
 printf(PC8 residency support: %d\n, has_pc8);
  
   - igt_require(has_runtime_pm || has_pc8);
   + igt_require(has_runtime_pm);
}
  
static void teardown_environment(void)
   --
   1.8.4
  
   ___
   Intel-gfx mailing list
   Intel-gfx@lists.freedesktop.org
   http://lists.freedesktop.org/mailman/listinfo/intel-gfx
  
   --
   Daniel Vetter
   Software Engineer, Intel Corporation
   +41 (0) 79 365 57 48 - http://blog.ffwll.ch
   ___
   Intel-gfx mailing list
   Intel-gfx@lists.freedesktop.org
   http://lists.freedesktop.org/mailman/listinfo/intel-gfx
 
 
 
 
 
 
 
 


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] tests/pm_pc8: skip the test if runtime PM is disabled

2014-04-28 Thread Imre Deak
On Mon, 2014-04-28 at 15:35 -0300, Paulo Zanoni wrote:
 2014-04-28 15:22 GMT-03:00 Imre Deak imre.d...@intel.com:
  On Mon, 2014-04-28 at 14:57 -0300, Paulo Zanoni wrote:
  2014-04-25 5:08 GMT-03:00 Daniel Vetter dan...@ffwll.ch:
   On Fri, Apr 25, 2014 at 10:29:57AM +0300, Imre Deak wrote:
   The PC8 state won't be entered unless runtime PM is enabled, so support
   for PC8 residency counters alone is not enough to run this test.
 
  This is true only for the very latest kernels. We have Kernels with
  PC8 support and without runtime PM support. Do you actually need this
  specific patch to solve any problems you're currently having? If not,
  maybe we could revert it so people with stable Kernels will be able to
  run IGT.
 
  The following one needs this at least on BDW/current kernel:
  https://bugs.freedesktop.org/show_bug.cgi?id=77565
 
 This is not a fix to the bug. By skipping the whole test suite, this
 patch is just hiding the fact that PC8 doesn't work on BDW. Yes, PC8
 on BDW (specifically) won't work without runtime PM, but that's not
 true for HSW on some Kernels. The proper fix to the bug above is to
 fix RC6 on BDW, or revert the revert.

Note that recently we made RC6 a requirement for runtime PM, so fixing
it is the only option for re-enabling runtime PM. 

  I might be wrong, but I thought we don't need backward compatibility in
  igt. We could also make the check kernel version dependent, I'm not sure
  if it's worth the effort.
 
  --Imre
 
 
  Thanks,
  Paulo
 
  
   Signed-off-by: Imre Deak imre.d...@intel.com
  
   Reviewed-by: Daniel Vetter daniel.vet...@ffwll.ch
  
   ---
tests/pm_pc8.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
  
   diff --git a/tests/pm_pc8.c b/tests/pm_pc8.c
   index 010af44..9a95326 100644
   --- a/tests/pm_pc8.c
   +++ b/tests/pm_pc8.c
   @@ -769,7 +769,7 @@ static void setup_environment(void)
 printf(Runtime PM support: %d\n, has_runtime_pm);
 printf(PC8 residency support: %d\n, has_pc8);
  
   - igt_require(has_runtime_pm || has_pc8);
   + igt_require(has_runtime_pm);
}
  
static void teardown_environment(void)
   --
   1.8.4
  
   ___
   Intel-gfx mailing list
   Intel-gfx@lists.freedesktop.org
   http://lists.freedesktop.org/mailman/listinfo/intel-gfx
  
   --
   Daniel Vetter
   Software Engineer, Intel Corporation
   +41 (0) 79 365 57 48 - http://blog.ffwll.ch
   ___
   Intel-gfx mailing list
   Intel-gfx@lists.freedesktop.org
   http://lists.freedesktop.org/mailman/listinfo/intel-gfx
 
 
 
 
 
 
 
 


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] Revert drm/i915: fix infinite loop at gen6_update_ring_freq

2014-04-28 Thread Daniel Vetter
On Mon, Apr 28, 2014 at 8:14 PM, Paulo Zanoni przan...@gmail.com wrote:
 This can probably be reproduced on non-BDW machines too, with RC6 disabled.

If I understand Imre's patch correctly the bug is that we didn't have
rc6 on bdw, but the sanitize function didn't make this clear leading
to bugs. If my understanding is wrong the I need to drop Imre's patch
again.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] Revert drm/i915: fix infinite loop at gen6_update_ring_freq

2014-04-28 Thread Imre Deak
On Mon, 2014-04-28 at 21:23 +0200, Daniel Vetter wrote:
 On Mon, Apr 28, 2014 at 8:14 PM, Paulo Zanoni przan...@gmail.com wrote:
  This can probably be reproduced on non-BDW machines too, with RC6 disabled.
 
 If I understand Imre's patch correctly the bug is that we didn't have
 rc6 on bdw, but the sanitize function didn't make this clear leading
 to bugs. 

Yes, that's correct. For runtime PM we require RC6 to be enabled, and we
use intel_enable_rc6() to check for this. Before patch [1]
intel_enable_rc6() reported incorrectly on BDW that RC6 is enabled.

--Imre

[1]
http://lists.freedesktop.org/archives/intel-gfx/2014-April/044354.html

If my understanding is wrong the I need to drop Imre's patch again.

 -Daniel


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v2] drm/i915: Debugfs disable RPS boost and idle

2014-04-28 Thread Daisy Sun
RP frequency request is affected by 2 modules: normal turbo
algorithm and RPS boost algorithm. By adding RPS boost algorithm
to the mix, the final frequency becomes relatively unpredictable.
Add a switch to enable/disable RPS boost functionality. When
disabled, RP frequency will follow the normal turbo algorithm only.

Intention: when boost and idle are disabled, we have a clear vision
of turbo algorithm. It‘s very helpful to verify if the turbo
algorithm is working as expected.
Without debugfs hooks, the RPS boost or idle may kicks in at
anytime and any circumstances.

V1-V2: Follow Daniel's comment to explain the intention.

Signed-off-by: Daisy Sun daisy@intel.com
---
 drivers/gpu/drm/i915/i915_debugfs.c | 40 +
 drivers/gpu/drm/i915/i915_drv.h |  1 +
 drivers/gpu/drm/i915/intel_pm.c |  8 ++--
 3 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 1e83ae4..ff71214 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3486,6 +3486,45 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_drop_caches_fops,
i915_drop_caches_get, i915_drop_caches_set,
0x%08llx\n);
 
+static int i915_rps_disable_boost_get(void *data, u64 *val)
+{
+   struct drm_device *dev = data;
+   struct drm_i915_private *dev_priv = dev-dev_private;
+
+   if (INTEL_INFO(dev)-gen  6)
+   return -ENODEV;
+
+   *val = dev_priv-rps.debugfs_disable_boost;
+
+   return 0;
+}
+
+static int i915_rps_disable_boost_set(void *data, u64 val)
+{
+   struct drm_device *dev = data;
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   int ret;
+
+   flush_delayed_work(dev_priv-rps.delayed_resume_work);
+
+   DRM_DEBUG_DRIVER(Setting RPS disable Boost-Idle mode to %s\n,
+val ? on : off);
+
+   ret = mutex_lock_interruptible(dev_priv-rps.hw_lock);
+   if (ret)
+   return ret;
+
+   dev_priv-rps.debugfs_disable_boost = val;
+
+   mutex_unlock(dev_priv-rps.hw_lock);
+
+   return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(i915_rps_disable_boost_fops,
+   i915_rps_disable_boost_get, i915_rps_disable_boost_set,
+   %llu\n);
+
 static int
 i915_max_freq_get(void *data, u64 *val)
 {
@@ -3821,6 +3860,7 @@ static const struct i915_debugfs_files {
{i915_wedged, i915_wedged_fops},
{i915_max_freq, i915_max_freq_fops},
{i915_min_freq, i915_min_freq_fops},
+   {i915_rps_disable_boost, i915_rps_disable_boost_fops},
{i915_cache_sharing, i915_cache_sharing_fops},
{i915_ring_stop, i915_ring_stop_fops},
{i915_ring_missed_irq, i915_ring_missed_irq_fops},
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 272aa7a..9c427da 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -847,6 +847,7 @@ struct intel_gen6_power_mgmt {
int last_adj;
enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
 
+   bool debugfs_disable_boost;
bool enabled;
struct delayed_work delayed_resume_work;
 
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 75c1c76..6acac14 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3163,7 +3163,9 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
struct drm_device *dev = dev_priv-dev;
 
mutex_lock(dev_priv-rps.hw_lock);
-   if (dev_priv-rps.enabled) {
+
+   if (dev_priv-rps.enabled
+!dev_priv-rps.debugfs_disable_boost) {
if (IS_VALLEYVIEW(dev))
vlv_set_rps_idle(dev_priv);
else
@@ -3178,7 +3180,9 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv)
struct drm_device *dev = dev_priv-dev;
 
mutex_lock(dev_priv-rps.hw_lock);
-   if (dev_priv-rps.enabled) {
+
+   if (dev_priv-rps.enabled
+!dev_priv-rps.debugfs_disable_boost) {
if (IS_VALLEYVIEW(dev))
valleyview_set_rps(dev_priv-dev, 
dev_priv-rps.max_freq_softlimit);
else
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 05.1/24] drm/i915: Make sure computed watermarks never overflow the registers

2014-04-28 Thread Paulo Zanoni
2014-04-28 9:44 GMT-03:00  ville.syrj...@linux.intel.com:
 From: Ville Syrjälä ville.syrj...@linux.intel.com

 When we calculate the watermarks for a pipe make sure we leave any
 level fully zeroed out if it would exceed any of the maximum values
 that fit in the registers.

 This will be important later when we start to use also disabled
 watermark levels during LP1+ merging.

Thanks for splitting the patch! It's much easier to review now :)


 Signed-off-by: Ville Syrjälä ville.syrj...@linux.intel.com
 ---
  drivers/gpu/drm/i915/intel_pm.c | 43 
 ++---
  1 file changed, 36 insertions(+), 7 deletions(-)

 diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
 index f061ef1..c722acb 100644
 --- a/drivers/gpu/drm/i915/intel_pm.c
 +++ b/drivers/gpu/drm/i915/intel_pm.c
 @@ -1921,6 +1921,16 @@ static void ilk_compute_wm_maximums(const struct 
 drm_device *dev,
 max-fbc = ilk_fbc_wm_reg_max(dev);
  }

 +static void ilk_compute_wm_reg_maximums(struct drm_device *dev,
 +   int level,
 +   struct ilk_wm_maximums *max)
 +{
 +   max-pri = ilk_plane_wm_reg_max(dev, level, false);
 +   max-spr = ilk_plane_wm_reg_max(dev, level, true);
 +   max-cur = ilk_cursor_wm_reg_max(dev, level);
 +   max-fbc = ilk_fbc_wm_reg_max(dev);
 +}
 +
  static bool ilk_validate_wm_level(int level,
   const struct ilk_wm_maximums *max,
   struct intel_wm_level *result)
 @@ -2178,9 +2188,6 @@ static bool intel_compute_pipe_wm(struct drm_crtc *crtc,
 };
 struct ilk_wm_maximums max;

 -   /* LP0 watermarks always use 1/2 DDB partitioning */
 -   ilk_compute_wm_maximums(dev, 0, config, INTEL_DDB_PART_1_2, max);
 -
 pipe_wm-pipe_enabled = params-active;
 pipe_wm-sprites_enabled = params-spr.enabled;
 pipe_wm-sprites_scaled = params-spr.scaled;
 @@ -2193,15 +2200,37 @@ static bool intel_compute_pipe_wm(struct drm_crtc 
 *crtc,
 if (params-spr.scaled)
 max_level = 0;

 -   for (level = 0; level = max_level; level++)
 -   ilk_compute_wm_level(dev_priv, level, params,
 -pipe_wm-wm[level]);
 +   ilk_compute_wm_level(dev_priv, 0, params, pipe_wm-wm[0]);

 if (IS_HASWELL(dev) || IS_BROADWELL(dev))
 pipe_wm-linetime = hsw_compute_linetime_wm(dev, crtc);

 +   /* LP0 watermarks always use 1/2 DDB partitioning */
 +   ilk_compute_wm_maximums(dev, 0, config, INTEL_DDB_PART_1_2, max);
 +
 /* At least LP0 must be valid */
 -   return ilk_validate_wm_level(0, max, pipe_wm-wm[0]);
 +   if (!ilk_validate_wm_level(0, max, pipe_wm-wm[0]))
 +   return false;

The only caller of this function does not really check its return
value. OTOH, fixing this is outside of the scope of your patch, I'm
just mentioning in case you have some watermarks TODO list :)

Reviewed-by: Paulo Zanoni paulo.r.zan...@intel.com

 +
 +   ilk_compute_wm_reg_maximums(dev, 1, max);
 +
 +   for (level = 1; level = max_level; level++) {
 +   struct intel_wm_level wm = {};
 +
 +   ilk_compute_wm_level(dev_priv, level, params, wm);
 +
 +   /*
 +* Disable any watermark level that exceeds the
 +* register maximums since such watermarks are
 +* always invalid.
 +*/
 +   if (!ilk_validate_wm_level(level, max, wm))
 +   break;
 +
 +   pipe_wm-wm[level] = wm;
 +   }
 +
 +   return true;
  }

  /*
 --
 1.8.3.2




-- 
Paulo Zanoni
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v2 05.2/24] drm/i915: Merge LP1+ watermarks in safer way

2014-04-28 Thread Paulo Zanoni
2014-04-28 9:44 GMT-03:00  ville.syrj...@linux.intel.com:
 From: Ville Syrjälä ville.syrj...@linux.intel.com

 On ILK when we disable a particular watermark level, we must
 maintain the actual watermark values for that level for some time
 (until the next vblank possibly). Otherwise we risk underruns.

 In order to achieve that result we must merge the LP1+ watermarks a
 bit differently since we must also merge levels that are to be
 disabled. We must also make sure we don't overflow the fields in the
 watermark registers in case the calculated watermarks come out too
 big to fit.

 As early as possbile we mark all computed watermark levels as
 disabled if they would exceed the register maximums. We make sure
 to leave the actual watermarks for such levels zeroed out. The during

_Then_ during merging, I guess.

Reviewed-by: Paulo Zanoni paulo.r.zan...@intel.com

 merging, we take the maxium values for every level, regardless if
 they're disabled or not. That may seem a bit pointless since at the
 moment all the watermark levels we merge should have their values
 zeroed if the level is already disabled. However soon we will be
 dealing with intermediate watermarks that, in addition to the new
 watermark values, also contain the previous watermark values, and so
 levels that are disabled may no longer be zeroed out.

 v2: Split the patch in two (Paulo)
 Use if() instead of  when merging -enable (Paulo)

 Signed-off-by: Ville Syrjälä ville.syrj...@linux.intel.com
 ---
  drivers/gpu/drm/i915/intel_pm.c | 37 -
  1 file changed, 28 insertions(+), 9 deletions(-)

 diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
 index c722acb..b89fc33 100644
 --- a/drivers/gpu/drm/i915/intel_pm.c
 +++ b/drivers/gpu/drm/i915/intel_pm.c
 @@ -2242,6 +2242,8 @@ static void ilk_merge_wm_level(struct drm_device *dev,
  {
 const struct intel_crtc *intel_crtc;

 +   ret_wm-enable = true;
 +
 list_for_each_entry(intel_crtc, dev-mode_config.crtc_list, 
 base.head) {
 const struct intel_pipe_wm *active = intel_crtc-wm.active;
 const struct intel_wm_level *wm = active-wm[level];
 @@ -2249,16 +2251,19 @@ static void ilk_merge_wm_level(struct drm_device *dev,
 if (!active-pipe_enabled)
 continue;

 +   /*
 +* The watermark values may have been used in the past,
 +* so we must maintain them in the registers for some
 +* time even if the level is now disabled.
 +*/
 if (!wm-enable)
 -   return;
 +   ret_wm-enable = false;

 ret_wm-pri_val = max(ret_wm-pri_val, wm-pri_val);
 ret_wm-spr_val = max(ret_wm-spr_val, wm-spr_val);
 ret_wm-cur_val = max(ret_wm-cur_val, wm-cur_val);
 ret_wm-fbc_val = max(ret_wm-fbc_val, wm-fbc_val);
 }
 -
 -   ret_wm-enable = true;
  }

  /*
 @@ -2270,6 +2275,7 @@ static void ilk_wm_merge(struct drm_device *dev,
  struct intel_pipe_wm *merged)
  {
 int level, max_level = ilk_wm_max_level(dev);
 +   int last_enabled_level = max_level;

 /* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */
 if ((INTEL_INFO(dev)-gen = 6 || IS_IVYBRIDGE(dev)) 
 @@ -2285,15 +2291,19 @@ static void ilk_wm_merge(struct drm_device *dev,

 ilk_merge_wm_level(dev, level, wm);

 -   if (!ilk_validate_wm_level(level, max, wm))
 -   break;
 +   if (level  last_enabled_level)
 +   wm-enable = false;
 +   else if (!ilk_validate_wm_level(level, max, wm))
 +   /* make sure all following levels get disabled */
 +   last_enabled_level = level - 1;

 /*
  * The spec says it is preferred to disable
  * FBC WMs instead of disabling a WM level.
  */
 if (wm-fbc_val  max-fbc) {
 -   merged-fbc_wm_enabled = false;
 +   if (wm-enable)
 +   merged-fbc_wm_enabled = false;
 wm-fbc_val = 0;
 }
 }
 @@ -2348,14 +2358,19 @@ static void ilk_compute_wm_results(struct drm_device 
 *dev,
 level = ilk_wm_lp_to_level(wm_lp, merged);

 r = merged-wm[level];
 -   if (!r-enable)
 -   break;

 -   results-wm_lp[wm_lp - 1] = WM3_LP_EN |
 +   /*
 +* Maintain the watermark values even if the level is
 +* disabled. Doing otherwise could cause underruns.
 +*/
 +   results-wm_lp[wm_lp - 1] =
 (ilk_wm_lp_latency(dev, level)  
 WM1_LP_LATENCY_SHIFT) |
 

Re: [Intel-gfx] [PATCH v2 07/24] drm/i915: Remove useless checks from primary enable/disable

2014-04-28 Thread Paulo Zanoni
2014-04-28 9:53 GMT-03:00  ville.syrj...@linux.intel.com:
 From: Ville Syrjälä ville.syrj...@linux.intel.com

 We won't be calling intel_enable_primary_plane() or
 intel_disable_primary_plane() with the primary plane in the
 wrong state. So remove the useless DISPLAY_PLANE_ENABLE checks.

 v2: Convert the checks to WARNs instead (Daniel,Paulo)

Reviewed-by: Paulo Zanoni paulo.r.zan...@intel.com


 Signed-off-by: Ville Syrjälä ville.syrj...@linux.intel.com
 ---
  drivers/gpu/drm/i915/intel_display.c | 6 ++
  1 file changed, 2 insertions(+), 4 deletions(-)

 diff --git a/drivers/gpu/drm/i915/intel_display.c 
 b/drivers/gpu/drm/i915/intel_display.c
 index 7938556..af9e3fe 100644
 --- a/drivers/gpu/drm/i915/intel_display.c
 +++ b/drivers/gpu/drm/i915/intel_display.c
 @@ -1896,8 +1896,7 @@ static void intel_enable_primary_plane(struct 
 drm_i915_private *dev_priv,

 reg = DSPCNTR(plane);
 val = I915_READ(reg);
 -   if (val  DISPLAY_PLANE_ENABLE)
 -   return;
 +   WARN_ON(val  DISPLAY_PLANE_ENABLE);

 I915_WRITE(reg, val | DISPLAY_PLANE_ENABLE);
 intel_flush_primary_plane(dev_priv, plane);
 @@ -1926,8 +1925,7 @@ static void intel_disable_primary_plane(struct 
 drm_i915_private *dev_priv,

 reg = DSPCNTR(plane);
 val = I915_READ(reg);
 -   if ((val  DISPLAY_PLANE_ENABLE) == 0)
 -   return;
 +   WARN_ON((val  DISPLAY_PLANE_ENABLE) == 0);

 I915_WRITE(reg, val  ~DISPLAY_PLANE_ENABLE);
 intel_flush_primary_plane(dev_priv, plane);
 --
 1.8.3.2




-- 
Paulo Zanoni
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v2 09/24] drm/i915: Keep vblank interrupts enabled while enabling/disabling planes

2014-04-28 Thread Paulo Zanoni
2014-04-28 9:58 GMT-03:00  ville.syrj...@linux.intel.com:
 From: Ville Syrjälä ville.syrj...@linux.intel.com

 Becasue of the upcoming vblank interrupt driven watermark update

BecaUSe.

Reviewed-by: Paulo Zanoni paulo.r.zan...@intel.com

 mechanism we will have use for vblank interrupts during plane
 enabling/disabling. So don't call drm_vblank_off() until planes
 are off, and call drm_vblank_on() just before we start to enable
 the planes.

 v2: Pimp commit message (Paulo)

 Signed-off-by: Ville Syrjälä ville.syrj...@linux.intel.com
 ---
  drivers/gpu/drm/i915/intel_display.c | 7 ---
  1 file changed, 4 insertions(+), 3 deletions(-)

 diff --git a/drivers/gpu/drm/i915/intel_display.c 
 b/drivers/gpu/drm/i915/intel_display.c
 index 88df4ea..8d2a31e 100644
 --- a/drivers/gpu/drm/i915/intel_display.c
 +++ b/drivers/gpu/drm/i915/intel_display.c
 @@ -3547,6 +3547,8 @@ static void ilk_crtc_enable_planes(struct drm_crtc 
 *crtc)
 int pipe = intel_crtc-pipe;
 int plane = intel_crtc-plane;

 +   drm_vblank_on(dev, pipe);
 +
 intel_enable_primary_plane(dev_priv, plane, pipe);
 intel_enable_planes(crtc);
 intel_crtc_update_cursor(crtc, true);
 @@ -3557,8 +3559,6 @@ static void ilk_crtc_enable_planes(struct drm_crtc 
 *crtc)
 mutex_lock(dev-struct_mutex);
 intel_update_fbc(dev);
 mutex_unlock(dev-struct_mutex);
 -
 -   drm_vblank_on(dev, pipe);
  }

  static void ilk_crtc_disable_planes(struct drm_crtc *crtc)
 @@ -3570,7 +3570,6 @@ static void ilk_crtc_disable_planes(struct drm_crtc 
 *crtc)
 int plane = intel_crtc-plane;

 intel_crtc_wait_for_pending_flips(crtc);
 -   drm_vblank_off(dev, pipe);

 if (dev_priv-fbc.plane == plane)
 intel_disable_fbc(dev);
 @@ -3581,6 +3580,8 @@ static void ilk_crtc_disable_planes(struct drm_crtc 
 *crtc)
 intel_disable_planes(crtc);
 intel_disable_primary_plane(dev_priv, plane, pipe);
 intel_wait_for_vblank(dev, pipe);
 +
 +   drm_vblank_off(dev, pipe);
  }

  static void ironlake_crtc_enable(struct drm_crtc *crtc)
 --
 1.8.3.2




-- 
Paulo Zanoni
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v2 41/71] drm/i915/chv: Add some workaround notes

2014-04-28 Thread Paulo Zanoni
2014-04-28 8:31 GMT-03:00  ville.syrj...@linux.intel.com:
 From: Ville Syrjälä ville.syrj...@linux.intel.com

 We implement the following workarounds:
 * WaDisableAsyncFlipPerfMode:chv
 * WaProgramMiArbOnOffAroundMiSetContext:chv

 v2: Drop WaDisableSemaphoreAndSyncFlipWait note

Reviewed-by: Paulo Zanoni paulo.r.zan...@intel.com


 Signed-off-by: Ville Syrjälä ville.syrj...@linux.intel.com
 ---
  drivers/gpu/drm/i915/i915_gem_context.c | 2 +-
  drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +-
  2 files changed, 2 insertions(+), 2 deletions(-)

 diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
 b/drivers/gpu/drm/i915/i915_gem_context.c
 index 30b355a..37dc36d 100644
 --- a/drivers/gpu/drm/i915/i915_gem_context.c
 +++ b/drivers/gpu/drm/i915/i915_gem_context.c
 @@ -614,7 +614,7 @@ mi_set_context(struct intel_ring_buffer *ring,
 if (ret)
 return ret;

 -   /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw */
 +   /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
 if (INTEL_INFO(ring-dev)-gen = 7)
 intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE);
 else
 diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
 b/drivers/gpu/drm/i915/intel_ringbuffer.c
 index eb3dd26..b025a51 100644
 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
 +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
 @@ -599,7 +599,7 @@ static int init_render_ring(struct intel_ring_buffer 
 *ring)
  * to use MI_WAIT_FOR_EVENT within the CS. It should already be
  * programmed to '1' on all products.
  *
 -* WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw
 +* WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw,chv
  */
 if (INTEL_INFO(dev)-gen = 6)
 I915_WRITE(MI_MODE, 
 _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
 --
 1.8.3.2




-- 
Paulo Zanoni
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 42/71] drm/i915/chv: Implement WaDisableSamplerPowerBypass for CHV

2014-04-28 Thread Paulo Zanoni
2014-04-28 5:23 GMT-03:00 Ville Syrjälä ville.syrj...@linux.intel.com:
 On Fri, Apr 25, 2014 at 05:55:38PM -0300, Paulo Zanoni wrote:
 2014-04-09 7:28 GMT-03:00  ville.syrj...@linux.intel.com:
  From: Rafael Barbalho rafael.barba...@intel.com
 
  Cherryview also needs this WA.

 At least on the chv_rebase tree, this WA is implemented for BDW but it
 is not documented as pre-prod only, and its name is not there. We
 should probably add a comment documenting the name and the fact that
 it is also pre-prod on BDW.

 IIRC BDW will need it even on production steppings.

Hmmm the register documentation says one thing while the WA lists say
others... I'll let you discover which one is correct :)


 I think I have a patch somewhere that add the w/a note for BDW, but I guess
 I didn't post it yet.



 
  Signed-off-by: Rafael Barbalho rafael.barba...@intel.com
  [vsyrjala: Looks like it's for pre-prodution hw only]
  Signed-off-by: Ville Syrjälä ville.syrj...@linux.intel.com
  ---
   drivers/gpu/drm/i915/intel_pm.c | 4 
   1 file changed, 4 insertions(+)
 
  diff --git a/drivers/gpu/drm/i915/intel_pm.c 
  b/drivers/gpu/drm/i915/intel_pm.c
  index 468fe37..60f876c 100644
  --- a/drivers/gpu/drm/i915/intel_pm.c
  +++ b/drivers/gpu/drm/i915/intel_pm.c
  @@ -5405,6 +5405,10 @@ static void cherryview_init_clock_gating(struct 
  drm_device *dev)
  /* WaDisableSDEUnitClockGating:chv */
  I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
 GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
  +
  +   /* WaDisableSamplerPowerBypass:chv (pre-production hw) */
  +   I915_WRITE(HALF_SLICE_CHICKEN3,
  +  _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));

 I could not find information anywhere if this is the correct
 implementation. Can you please provide me pointers to the doc you
 used? The links on Collab seem broken.

 Just w/a database + bspec are enough for this one.

Found it :)

Reviewed-by: Paulo Zanoni paulo.r.zan...@intel.com



 Thanks,
 Paulo

   }
 
   static void g4x_init_clock_gating(struct drm_device *dev)
  --
  1.8.3.2
 
  ___
  Intel-gfx mailing list
  Intel-gfx@lists.freedesktop.org
  http://lists.freedesktop.org/mailman/listinfo/intel-gfx



 --
 Paulo Zanoni

 --
 Ville Syrjälä
 Intel OTC



-- 
Paulo Zanoni
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915: Support 64b relocations

2014-04-28 Thread Ben Widawsky
All the rest of the code to enable this is in my branch. Without my
branch, hitting  32b offsets is impossible. The code has always
supported 64b, but it's never actually been run of tested. This change
doesn't actually fix anything. [1] I am not sure why X won't work yet. I
do not get hangs or obvious errors.

There are 3 fixes grouped together here. First is to remove the
hardcoded 0 for the upper dword of the relocation. The next fix is to
use a 64b value for target_offset. The final fix is to not directly
apply target_offset to reloc-delta. reloc-delta is part of ABI, and so
we cannot change it. As it stands, 32b is enough to represent everything
we're interested in representing anyway. The main problem is, we cannot
add greater than 32b values to it directly.

[1] Almost all of intel-gpu-tools is not yet ready to test 64b
relocations. There are a few places that expect 32b values for offsets
and these all won't work.

Cc: Rafael Barbalho rafael.barba...@intel.com
Cc: Chris Wilson ch...@chris-wilson.co.uk
Signed-off-by: Ben Widawsky b...@bwidawsk.net
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 23 +--
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 0d806fc..6ffecd2 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -262,10 +262,12 @@ static inline int use_cpu_reloc(struct 
drm_i915_gem_object *obj)
 
 static int
 relocate_entry_cpu(struct drm_i915_gem_object *obj,
-  struct drm_i915_gem_relocation_entry *reloc)
+  struct drm_i915_gem_relocation_entry *reloc,
+  uint64_t target_offset)
 {
struct drm_device *dev = obj-base.dev;
uint32_t page_offset = offset_in_page(reloc-offset);
+   uint64_t delta = reloc-delta + target_offset;
char *vaddr;
int ret;
 
@@ -275,7 +277,7 @@ relocate_entry_cpu(struct drm_i915_gem_object *obj,
 
vaddr = kmap_atomic(i915_gem_object_get_page(obj,
reloc-offset  PAGE_SHIFT));
-   *(uint32_t *)(vaddr + page_offset) = reloc-delta;
+   *(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta);
 
if (INTEL_INFO(dev)-gen = 8) {
page_offset = offset_in_page(page_offset + sizeof(uint32_t));
@@ -286,7 +288,7 @@ relocate_entry_cpu(struct drm_i915_gem_object *obj,
(reloc-offset + sizeof(uint32_t))  PAGE_SHIFT));
}
 
-   *(uint32_t *)(vaddr + page_offset) = 0;
+   *(uint32_t *)(vaddr + page_offset) = upper_32_bits(delta);
}
 
kunmap_atomic(vaddr);
@@ -296,10 +298,12 @@ relocate_entry_cpu(struct drm_i915_gem_object *obj,
 
 static int
 relocate_entry_gtt(struct drm_i915_gem_object *obj,
-  struct drm_i915_gem_relocation_entry *reloc)
+  struct drm_i915_gem_relocation_entry *reloc,
+  uint64_t target_offset)
 {
struct drm_device *dev = obj-base.dev;
struct drm_i915_private *dev_priv = dev-dev_private;
+   uint64_t delta = reloc-delta + target_offset;
uint32_t __iomem *reloc_entry;
void __iomem *reloc_page;
int ret;
@@ -318,7 +322,7 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj,
reloc-offset  PAGE_MASK);
reloc_entry = (uint32_t __iomem *)
(reloc_page + offset_in_page(reloc-offset));
-   iowrite32(reloc-delta, reloc_entry);
+   iowrite32(lower_32_bits(delta), reloc_entry);
 
if (INTEL_INFO(dev)-gen = 8) {
reloc_entry += 1;
@@ -331,7 +335,7 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj,
reloc_entry = reloc_page;
}
 
-   iowrite32(0, reloc_entry);
+   iowrite32(upper_32_bits(delta), reloc_entry);
}
 
io_mapping_unmap_atomic(reloc_page);
@@ -348,7 +352,7 @@ i915_gem_execbuffer_relocate_entry(struct 
drm_i915_gem_object *obj,
struct drm_gem_object *target_obj;
struct drm_i915_gem_object *target_i915_obj;
struct i915_vma *target_vma;
-   uint32_t target_offset;
+   uint64_t target_offset;
int ret;
 
/* we've already hold a reference to all valid objects */
@@ -427,11 +431,10 @@ i915_gem_execbuffer_relocate_entry(struct 
drm_i915_gem_object *obj,
if (obj-active  in_atomic())
return -EFAULT;
 
-   reloc-delta += target_offset;
if (use_cpu_reloc(obj))
-   ret = relocate_entry_cpu(obj, reloc);
+   ret = relocate_entry_cpu(obj, reloc, target_offset);
else
-   ret = relocate_entry_gtt(obj, reloc);
+   ret = relocate_entry_gtt(obj, reloc, target_offset);
 
if (ret)
return ret;
-- 
1.9.2

___
Intel-gfx mailing 

Re: [Intel-gfx] [PATCH 2/2] drm/i915: Print captured bo for all VM in error state

2014-04-28 Thread Ben Widawsky
On Sat, Jan 25, 2014 at 08:10:06PM +0100, Daniel Vetter wrote:
 On Fri, Jan 24, 2014 at 12:13:44PM -0800, Ben Widawsky wrote:
  ping
 
 Merged the first patch to topic/ppgtt, but punted on the 2nd - I think
 with Mika's improvement to the guilty batch detection we should be able to
 fix this better. Or what's the consensus here?
 
 Aside: I didn't spot your r-b burried way at the bottom of your mail,
 hence why I didn't apply them.
 -Daniel
 

What happened to this patch or its equivalent?

  
  On Fri, Jan 10, 2014 at 08:08:26PM +, Chris Wilson wrote:
   On Fri, Jan 10, 2014 at 11:59:10AM -0800, Ben Widawsky wrote:
I will gladly re-review if you make any of my suggested changes.
   
   Hmm, I had already done the capture_vma one since that is required to
   display the right addresses in the error state.
   
   The output is like:
   
   vm[0]
 Active [0]:
 Pinned [10]:
   0020 8192 10 00 0 0 P dirty L3+LLC
   00202000 4096 01 01 0 0 P snooped or LLC
   00203000   131072 40 40 0 0 P dirty snooped or LLC
   00223000 4096 01 01 0 0 P snooped or LLC
   00224000 4096 01 01 0 0 P snooped or LLC
   00225000   131072 40 40 0 0 P dirty snooped or LLC
   00245000 4096 01 01 0 0 P snooped or LLC
   00246000   131072 40 40 0 0 P dirty snooped or LLC
   00266000  8294400 41 00 0 0 P uncached
   083f2000 8192 41 00 0 0 P L3+LLC
   vm[1]
 Active [0]:
 Pinned [0]:
   vm[2]
 Active [1]:
    4096 3f 00 f000 0 dirty bsd snooped or LLC
 Pinned [0]:
   vm[3]
 Active [1]:
    4096 3f 00 f00c 0 dirty bsd snooped or LLC
 Pinned [0]:
   vm[4]
 Active [1]:
    4096 3f 00 f010 0 dirty bsd snooped or LLC
 Pinned [0]:
   vm[5]
 Active [1]:
    4096 3f 00 f016 0 dirty bsd snooped or LLC
 Pinned [0]:
   vm[6]
 Active [1]:
    4096 3f 00 f019 0 dirty bsd snooped or LLC
 Pinned [0]:
   vm[7]
 Active [1]:
    4096 3f 00 f01e 0 dirty bsd snooped or LLC
 Pinned [0]:
   vm[8]
 Active [1]:
    4096 3f 00 f017 0 dirty bsd snooped or LLC
 Pinned [0]:
   vm[9]
 Active [1]:
    4096 3f 00 f015 0 dirty bsd snooped or LLC
 Pinned [0]:
   vm[10]
 Active [1]:
    4096 3f 00 f02b 0 dirty bsd snooped or LLC
 Pinned [0]:
   vm[11]
 Active [1]:
    4096 3f 00 f039 0 dirty bsd snooped or LLC
 Pinned [0]:
   vm[12]
 Active [1]:
    4096 3f 00 f03c 0 dirty bsd snooped or LLC
 Pinned [0]:
   vm[13]
 Active [1]:
    4096 3f 00 f043 0 dirty bsd snooped or LLC
 Pinned [0]:
   vm[14]
 Active [1]:
    4096 3f 00 f04d 0 dirty bsd snooped or LLC
 Pinned [0]:
   vm[15]
 Active [1]:
    4096 3f 00 f02f 0 dirty bsd snooped or LLC
 Pinned [0]:
   vm[16]
 Active [1]:
    4096 3f 00 f053 0 dirty bsd snooped or LLC
 Pinned [0]:
   vm[17]
 Active [1]:
    4096 3f 00 f05e 0 dirty bsd snooped or LLC
 Pinned [0]:
   vm[18]
 Active [1]:
    4096 3f 00 f059 0 dirty bsd snooped or LLC
 Pinned [0]:
   vm[19]
 Active [1]:
    4096 3f 00 f03e 0 dirty bsd snooped or LLC
 Pinned [0]:
   vm[20]
 Active [1]:
    4096 3f 00 f067 0 dirty bsd snooped or LLC
 Pinned [0]:
   vm[21]
 Active [1]:
    4096 3f 00 f06a 0 dirty bsd snooped or LLC
 Pinned [0]:
   vm[22]
 Active [1]:
    4096 3f 00 f068 0 dirty bsd snooped or LLC
 Pinned [0]:
   vm[23]
 Active [1]:
    4096 3f 00 f071 0 dirty bsd snooped or LLC
 Pinned [0]:
   vm[24]
 Active [1]:
    4096 3f 00 f074 0 dirty bsd snooped or LLC
 Pinned [0]:
   vm[25]
 Active [1]:
    4096 3f 00 f077 0 dirty bsd snooped or LLC
 Pinned [0]:
   vm[26]
 Active [1]:
    4096 3f 00 f07d 0 dirty bsd snooped or LLC
 Pinned [0]:
   vm[27]
 Active [1]:
    4096 3f 00 f07f 0 dirty bsd snooped or LLC
 Pinned [0]:
   vm[28]
 Active [1]:
    4096 3f 00 f082 0 dirty bsd snooped or LLC
 Pinned [0]:
   vm[29]
 Active [1]:
    4096 3f 00 f085 0 dirty bsd snooped or LLC
 Pinned [0]:
   vm[30]
 Active [1]:
    4096 3f 00 f088 0 dirty bsd snooped or LLC
 Pinned [0]:
   vm[31]
 Active [1]:
    4096 3f 00 f05c 0 dirty bsd snooped or LLC
 Pinned [0]:
   vm[32]
 Active [1]:
    4096 3f 00 f08e 0 dirty bsd snooped or LLC
 Pinned [0]:
   vm[33]
 Active [1]:
    4096 3f 00 f08c 0 dirty bsd snooped or LLC
 

[Intel-gfx] [PATCH] drm/i915: Expand error state's address width to 64b

2014-04-28 Thread Ben Widawsky
Signed-off-by: Ben Widawsky b...@bwidawsk.net
---
 drivers/gpu/drm/i915/i915_drv.h   |  4 ++--
 drivers/gpu/drm/i915/i915_gpu_error.c | 16 +---
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 539f16db..cdde849 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -366,7 +366,7 @@ struct drm_i915_error_state {
 
struct drm_i915_error_object {
int page_count;
-   u32 gtt_offset;
+   u64 gtt_offset;
u32 *pages[0];
} *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
 
@@ -391,7 +391,7 @@ struct drm_i915_error_state {
u32 size;
u32 name;
u32 rseqno, wseqno;
-   u32 gtt_offset;
+   u64 gtt_offset;
u32 read_domains;
u32 write_domain;
s32 fence_reg:I915_MAX_NUM_FENCE_BITS;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index 481a7d1..a5cd3b0 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -194,7 +194,7 @@ static void print_error_buffers(struct 
drm_i915_error_state_buf *m,
err_printf(m, %s [%d]:\n, name, count);
 
while (count--) {
-   err_printf(m,   %08x %8u %02x %02x %x %x,
+   err_printf(m,   %16llx %8u %02x %02x %x %x,
   err-gtt_offset,
   err-size,
   err-read_domains,
@@ -401,7 +401,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf 
*m,
err_printf(m,  (submitted by %s [%d]),
   error-ring[i].comm,
   error-ring[i].pid);
-   err_printf(m,  --- gtt_offset = 0x%08x\n,
+   err_printf(m,  --- gtt_offset = 0x%16llx\n,
   obj-gtt_offset);
print_error_obj(m, obj);
}
@@ -409,7 +409,8 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf 
*m,
obj = error-ring[i].wa_batchbuffer;
if (obj) {
err_printf(m, %s (w/a) --- gtt_offset = 0x%08x\n,
-  dev_priv-ring[i].name, obj-gtt_offset);
+  dev_priv-ring[i].name,
+  lower_32_bits(obj-gtt_offset));
print_error_obj(m, obj);
}
 
@@ -428,14 +429,14 @@ int i915_error_state_to_str(struct 
drm_i915_error_state_buf *m,
if ((obj = error-ring[i].ringbuffer)) {
err_printf(m, %s --- ringbuffer = 0x%08x\n,
   dev_priv-ring[i].name,
-  obj-gtt_offset);
+  lower_32_bits(obj-gtt_offset));
print_error_obj(m, obj);
}
 
if ((obj = error-ring[i].hws_page)) {
err_printf(m, %s --- HW Status = 0x%08x\n,
   dev_priv-ring[i].name,
-  obj-gtt_offset);
+  lower_32_bits(obj-gtt_offset));
offset = 0;
for (elt = 0; elt  PAGE_SIZE/16; elt += 4) {
err_printf(m, [%04x] %08x %08x %08x %08x\n,
@@ -451,14 +452,15 @@ int i915_error_state_to_str(struct 
drm_i915_error_state_buf *m,
if ((obj = error-ring[i].ctx)) {
err_printf(m, %s --- HW Context = 0x%08x\n,
   dev_priv-ring[i].name,
-  obj-gtt_offset);
+  lower_32_bits(obj-gtt_offset));
print_error_obj(m, obj);
}
}
 
obj = error-semaphore_obj;
if (obj) {
-   err_printf(m, Semaphore page = 0x%08x\n, obj-gtt_offset);
+   err_printf(m, Semaphore page = 0x%08x\n,
+  lower_32_bits(obj-gtt_offset));
for (elt = 0; elt  PAGE_SIZE/16; elt += 4) {
err_printf(m, [%04x] %08x %08x %08x %08x\n,
   elt * 4,
-- 
1.9.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] [v2] drm/i915: Expand error state's address width to 64b

2014-04-28 Thread Ben Widawsky
v2: 0 pad the new 8B fields or else intel_error_decode has a hard time.
Note, regardless we need an igt update.

Signed-off-by: Ben Widawsky b...@bwidawsk.net
---
 drivers/gpu/drm/i915/i915_drv.h   |  4 ++--
 drivers/gpu/drm/i915/i915_gpu_error.c | 16 +---
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 539f16db..cdde849 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -366,7 +366,7 @@ struct drm_i915_error_state {
 
struct drm_i915_error_object {
int page_count;
-   u32 gtt_offset;
+   u64 gtt_offset;
u32 *pages[0];
} *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
 
@@ -391,7 +391,7 @@ struct drm_i915_error_state {
u32 size;
u32 name;
u32 rseqno, wseqno;
-   u32 gtt_offset;
+   u64 gtt_offset;
u32 read_domains;
u32 write_domain;
s32 fence_reg:I915_MAX_NUM_FENCE_BITS;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index 481a7d1..881ad8f 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -194,7 +194,7 @@ static void print_error_buffers(struct 
drm_i915_error_state_buf *m,
err_printf(m, %s [%d]:\n, name, count);
 
while (count--) {
-   err_printf(m,   %08x %8u %02x %02x %x %x,
+   err_printf(m,   %016llx %8u %02x %02x %x %x,
   err-gtt_offset,
   err-size,
   err-read_domains,
@@ -401,7 +401,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf 
*m,
err_printf(m,  (submitted by %s [%d]),
   error-ring[i].comm,
   error-ring[i].pid);
-   err_printf(m,  --- gtt_offset = 0x%08x\n,
+   err_printf(m,  --- gtt_offset = 0x%016llx\n,
   obj-gtt_offset);
print_error_obj(m, obj);
}
@@ -409,7 +409,8 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf 
*m,
obj = error-ring[i].wa_batchbuffer;
if (obj) {
err_printf(m, %s (w/a) --- gtt_offset = 0x%08x\n,
-  dev_priv-ring[i].name, obj-gtt_offset);
+  dev_priv-ring[i].name,
+  lower_32_bits(obj-gtt_offset));
print_error_obj(m, obj);
}
 
@@ -428,14 +429,14 @@ int i915_error_state_to_str(struct 
drm_i915_error_state_buf *m,
if ((obj = error-ring[i].ringbuffer)) {
err_printf(m, %s --- ringbuffer = 0x%08x\n,
   dev_priv-ring[i].name,
-  obj-gtt_offset);
+  lower_32_bits(obj-gtt_offset));
print_error_obj(m, obj);
}
 
if ((obj = error-ring[i].hws_page)) {
err_printf(m, %s --- HW Status = 0x%08x\n,
   dev_priv-ring[i].name,
-  obj-gtt_offset);
+  lower_32_bits(obj-gtt_offset));
offset = 0;
for (elt = 0; elt  PAGE_SIZE/16; elt += 4) {
err_printf(m, [%04x] %08x %08x %08x %08x\n,
@@ -451,14 +452,15 @@ int i915_error_state_to_str(struct 
drm_i915_error_state_buf *m,
if ((obj = error-ring[i].ctx)) {
err_printf(m, %s --- HW Context = 0x%08x\n,
   dev_priv-ring[i].name,
-  obj-gtt_offset);
+  lower_32_bits(obj-gtt_offset));
print_error_obj(m, obj);
}
}
 
obj = error-semaphore_obj;
if (obj) {
-   err_printf(m, Semaphore page = 0x%08x\n, obj-gtt_offset);
+   err_printf(m, Semaphore page = 0x%08x\n,
+  lower_32_bits(obj-gtt_offset));
for (elt = 0; elt  PAGE_SIZE/16; elt += 4) {
err_printf(m, [%04x] %08x %08x %08x %08x\n,
   elt * 4,
-- 
1.9.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] intel_error_decode: use 64b gtt_offset

2014-04-28 Thread Ben Widawsky
See the relevant kernel patch for the details. I guess this breaks
support for older error state, I am not actually sure. Without
versioning our error state though, I cannot think of a better way.
Suggestions are welcome.

Signed-off-by: Ben Widawsky b...@bwidawsk.net
---
 tools/intel_error_decode.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tools/intel_error_decode.c b/tools/intel_error_decode.c
index 1eeff07..d0028a1 100644
--- a/tools/intel_error_decode.c
+++ b/tools/intel_error_decode.c
@@ -311,17 +311,17 @@ print_fence(unsigned int devid, uint64_t fence)
 uint32_t head[MAX_RINGS];
 int head_ndx = 0;
 int num_rings = 0;
-static void print_batch(int is_batch, const char *ring_name, uint32_t 
gtt_offset)
+static void print_batch(int is_batch, const char *ring_name, uint64_t 
gtt_offset)
 {
const char *buffer_type[2] = {  ringbuffer, batchbuffer };
if (is_batch || !num_rings)
-   printf(%s (%s) at 0x%08x\n, buffer_type[is_batch], ring_name, 
gtt_offset);
+   printf(%s (%s) at 0x%016lx\n, buffer_type[is_batch], 
ring_name, gtt_offset);
else
-   printf(%s (%s) at 0x%08x; HEAD points to: 0x%08x\n, 
buffer_type[is_batch], ring_name, gtt_offset, head[head_ndx++ % num_rings] + 
gtt_offset);
+   printf(%s (%s) at 0x%016lx; HEAD points to: 0x%016lx\n, 
buffer_type[is_batch], ring_name, gtt_offset, head[head_ndx++ % num_rings] + 
gtt_offset);
 }
 
 static void decode(struct drm_intel_decode *ctx, bool is_batch,
-  const char *ring_name, uint32_t gtt_offset, uint32_t *data,
+  const char *ring_name, uint64_t gtt_offset, uint32_t *data,
   int *count)
 {
if (!*count)
@@ -344,7 +344,7 @@ read_data_file(FILE *file)
char *line = NULL;
size_t line_size;
uint32_t offset, value, ring_length = 0;
-   uint32_t gtt_offset = 0, new_gtt_offset;
+   uint64_t gtt_offset = 0, new_gtt_offset;
char *ring_name = NULL;
int is_batch = 1;
 
@@ -361,7 +361,7 @@ read_data_file(FILE *file)
if (num_rings == -1)
num_rings = head_ndx;
 
-   matched = sscanf(dashes, --- gtt_offset = 0x%08x\n,
+   matched = sscanf(dashes, --- gtt_offset = 0x%016lx\n,
new_gtt_offset);
if (matched == 1) {
decode(decode_ctx, is_batch, ring_name,
@@ -373,7 +373,7 @@ read_data_file(FILE *file)
continue;
}
 
-   matched = sscanf(dashes, --- ringbuffer = 0x%08x\n,
+   matched = sscanf(dashes, --- ringbuffer = 0x%08lx\n,
new_gtt_offset);
if (matched == 1) {
decode(decode_ctx, is_batch, ring_name,
-- 
1.9.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v2] drm/i915: Debugfs disable RPS boost and idle

2014-04-28 Thread Ben Widawsky
On Mon, Apr 28, 2014 at 01:53:52PM -0700, Daisy Sun wrote:
 RP frequency request is affected by 2 modules: normal turbo
 algorithm and RPS boost algorithm. By adding RPS boost algorithm
 to the mix, the final frequency becomes relatively unpredictable.
 Add a switch to enable/disable RPS boost functionality. When
 disabled, RP frequency will follow the normal turbo algorithm only.
 
 Intention: when boost and idle are disabled, we have a clear vision
 of turbo algorithm. It‘s very helpful to verify if the turbo
 algorithm is working as expected.
 Without debugfs hooks, the RPS boost or idle may kicks in at
 anytime and any circumstances.
 
 V1-V2: Follow Daniel's comment to explain the intention.
 
 Signed-off-by: Daisy Sun daisy@intel.com
 ---
  drivers/gpu/drm/i915/i915_debugfs.c | 40 
 +
  drivers/gpu/drm/i915/i915_drv.h |  1 +
  drivers/gpu/drm/i915/intel_pm.c |  8 ++--
  3 files changed, 47 insertions(+), 2 deletions(-)
 
 diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
 b/drivers/gpu/drm/i915/i915_debugfs.c
 index 1e83ae4..ff71214 100644
 --- a/drivers/gpu/drm/i915/i915_debugfs.c
 +++ b/drivers/gpu/drm/i915/i915_debugfs.c
 @@ -3486,6 +3486,45 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_drop_caches_fops,
   i915_drop_caches_get, i915_drop_caches_set,
   0x%08llx\n);
  
 +static int i915_rps_disable_boost_get(void *data, u64 *val)
 +{
 + struct drm_device *dev = data;
 + struct drm_i915_private *dev_priv = dev-dev_private;
 +
 + if (INTEL_INFO(dev)-gen  6)
 + return -ENODEV;
 +
 + *val = dev_priv-rps.debugfs_disable_boost;
 +
 + return 0;
 +}
 +
 +static int i915_rps_disable_boost_set(void *data, u64 val)
 +{
 + struct drm_device *dev = data;
 + struct drm_i915_private *dev_priv = dev-dev_private;
 + int ret;
 +
 + flush_delayed_work(dev_priv-rps.delayed_resume_work);

I'm not really sure why you feel it's necessary to flush the wq here.
Note that you have no real safety since you cannot acquire the lock, and
another event can get queued up after the flush. In other words,
whatever you're trying to do probably can fail.

Also note that without this, a simple atomic_t would suffice for
debugfs_disable_boost.

 +
 + DRM_DEBUG_DRIVER(Setting RPS disable Boost-Idle mode to %s\n,
 +  val ? on : off);
 +
 + ret = mutex_lock_interruptible(dev_priv-rps.hw_lock);
 + if (ret)
 + return ret;
 +
 + dev_priv-rps.debugfs_disable_boost = val;
 +
 + mutex_unlock(dev_priv-rps.hw_lock);
 +
 + return 0;
 +}
 +
 +DEFINE_SIMPLE_ATTRIBUTE(i915_rps_disable_boost_fops,
 + i915_rps_disable_boost_get, i915_rps_disable_boost_set,
 + %llu\n);
 +
  static int
  i915_max_freq_get(void *data, u64 *val)
  {
 @@ -3821,6 +3860,7 @@ static const struct i915_debugfs_files {
   {i915_wedged, i915_wedged_fops},
   {i915_max_freq, i915_max_freq_fops},
   {i915_min_freq, i915_min_freq_fops},
 + {i915_rps_disable_boost, i915_rps_disable_boost_fops},
   {i915_cache_sharing, i915_cache_sharing_fops},
   {i915_ring_stop, i915_ring_stop_fops},
   {i915_ring_missed_irq, i915_ring_missed_irq_fops},
 diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
 index 272aa7a..9c427da 100644
 --- a/drivers/gpu/drm/i915/i915_drv.h
 +++ b/drivers/gpu/drm/i915/i915_drv.h
 @@ -847,6 +847,7 @@ struct intel_gen6_power_mgmt {
   int last_adj;
   enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
  
 + bool debugfs_disable_boost;
   bool enabled;
   struct delayed_work delayed_resume_work;
  
 diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
 index 75c1c76..6acac14 100644
 --- a/drivers/gpu/drm/i915/intel_pm.c
 +++ b/drivers/gpu/drm/i915/intel_pm.c
 @@ -3163,7 +3163,9 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
   struct drm_device *dev = dev_priv-dev;
  
   mutex_lock(dev_priv-rps.hw_lock);
 - if (dev_priv-rps.enabled) {
 +
 + if (dev_priv-rps.enabled
 +  !dev_priv-rps.debugfs_disable_boost) {
   if (IS_VALLEYVIEW(dev))
   vlv_set_rps_idle(dev_priv);
   else
 @@ -3178,7 +3180,9 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv)
   struct drm_device *dev = dev_priv-dev;
  
   mutex_lock(dev_priv-rps.hw_lock);
 - if (dev_priv-rps.enabled) {
 +
 + if (dev_priv-rps.enabled
 +  !dev_priv-rps.debugfs_disable_boost) {
   if (IS_VALLEYVIEW(dev))
   valleyview_set_rps(dev_priv-dev, 
 dev_priv-rps.max_freq_softlimit);
   else
 -- 
 1.9.1
 
 ___
 Intel-gfx mailing list
 Intel-gfx@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ben Widawsky, Intel Open Source Technology Center

[Intel-gfx] [PATCH] drm/i915: Support 64b execbuf

2014-04-28 Thread Ben Widawsky
Previously, our code only had a 32b offset value for where the
batchbuffer starts. With full PPGTT, and 64b canonical GPU address
space, that is an insufficient value. The code to expand is pretty
straight forward, and only one platform needs to do anything with the
extra bits.

Signed-off-by: Ben Widawsky b...@bwidawsk.net
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c| 16 
 drivers/gpu/drm/i915/intel_ringbuffer.h|  2 +-
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 6ffecd2..f5f0b92 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1017,7 +1017,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
struct i915_hw_context *ctx;
struct i915_address_space *vm;
const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
-   u32 exec_start = args-batch_start_offset, exec_len;
+   u64 exec_start = args-batch_start_offset, exec_len;
u32 mask, flags;
int ret, mode, i;
bool need_relocs;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index a42942f..bbe989f 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1324,7 +1324,7 @@ gen8_ring_put_irq(struct intel_ring_buffer *ring)
 
 static int
 i965_dispatch_execbuffer(struct intel_ring_buffer *ring,
-u32 offset, u32 length,
+u64 offset, u32 length,
 unsigned flags)
 {
int ret;
@@ -1347,7 +1347,7 @@ i965_dispatch_execbuffer(struct intel_ring_buffer *ring,
 #define I830_BATCH_LIMIT (256*1024)
 static int
 i830_dispatch_execbuffer(struct intel_ring_buffer *ring,
-   u32 offset, u32 len,
+   u64 offset, u32 len,
unsigned flags)
 {
int ret;
@@ -1398,7 +1398,7 @@ i830_dispatch_execbuffer(struct intel_ring_buffer *ring,
 
 static int
 i915_dispatch_execbuffer(struct intel_ring_buffer *ring,
-u32 offset, u32 len,
+u64 offset, u32 len,
 unsigned flags)
 {
int ret;
@@ -1943,7 +1943,7 @@ static int gen6_bsd_ring_flush(struct intel_ring_buffer 
*ring,
 
 static int
 gen8_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
- u32 offset, u32 len,
+ u64 offset, u32 len,
  unsigned flags)
 {
struct drm_i915_private *dev_priv = ring-dev-dev_private;
@@ -1957,8 +1957,8 @@ gen8_ring_dispatch_execbuffer(struct intel_ring_buffer 
*ring,
 
/* FIXME(BDW): Address space and security selectors. */
intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt8));
-   intel_ring_emit(ring, offset);
-   intel_ring_emit(ring, 0);
+   intel_ring_emit(ring, lower_32_bits(offset));
+   intel_ring_emit(ring, upper_32_bits(offset));
intel_ring_emit(ring, MI_NOOP);
intel_ring_advance(ring);
 
@@ -1967,7 +1967,7 @@ gen8_ring_dispatch_execbuffer(struct intel_ring_buffer 
*ring,
 
 static int
 hsw_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
- u32 offset, u32 len,
+ u64 offset, u32 len,
  unsigned flags)
 {
int ret;
@@ -1988,7 +1988,7 @@ hsw_ring_dispatch_execbuffer(struct intel_ring_buffer 
*ring,
 
 static int
 gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
- u32 offset, u32 len,
+ u64 offset, u32 len,
  unsigned flags)
 {
int ret;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index dbdce5f..cb55cff 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -135,7 +135,7 @@ struct  intel_ring_buffer {
void(*set_seqno)(struct intel_ring_buffer *ring,
 u32 seqno);
int (*dispatch_execbuffer)(struct intel_ring_buffer *ring,
-  u32 offset, u32 length,
+  u64 offset, u32 length,
   unsigned flags);
 #define I915_DISPATCH_SECURE 0x1
 #define I915_DISPATCH_PINNED 0x2
-- 
1.9.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx