[Intel-gfx] [PATCH] drm/i915: bdw: fix RC6 enabled status reporting and disable runtime PM
On BDW we don't enable RC6 at the moment, but this isn't reflected in the (sanitized) i915.enable_rc6 option. So make enable_rc6 report correctly that RC6 is disabled, which will also effectively disable RPM on BDW (since RPM depends on RC6). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=77565 Signed-off-by: Imre Deak imre.d...@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 4 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index d49ec02..19020e5 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3260,6 +3260,10 @@ static int sanitize_rc6_option(const struct drm_device *dev, int enable_rc6) if (INTEL_INFO(dev)-gen == 5 !IS_IRONLAKE_M(dev)) return 0; + /* Disable RC6 on Broadwell for now */ + if (IS_BROADWELL(dev)) + return 0; + /* Respect the kernel parameter if it is set */ if (enable_rc6 = 0) { int mask; -- 1.8.4 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [RFC] tests/gem_bo_falloc: New igt for testing gem_fallocate() ioctl
From: Siluvery, Arun arun.siluv...@intel.com This ioctl allows vary the effective size of the gem object. User can mark certain range in object space as scratch thus effectively modifying the size used. v2: modify subtest names and function names as per tooling convention. Signed-off-by: Siluvery, Arun arun.siluv...@intel.com --- tests/Makefile.sources | 1 + tests/gem_bo_falloc.c | 471 + 2 files changed, 472 insertions(+) create mode 100644 tests/gem_bo_falloc.c diff --git a/tests/Makefile.sources b/tests/Makefile.sources index 88866ac..25c010e 100644 --- a/tests/Makefile.sources +++ b/tests/Makefile.sources @@ -120,6 +120,7 @@ TESTS_progs = \ gem_unref_active_buffers \ gem_vmap_blits \ gem_wait_render_timeout \ + gem_bo_falloc \ gen3_mixed_blits \ gen3_render_linear_blits \ gen3_render_mixed_blits \ diff --git a/tests/gem_bo_falloc.c b/tests/gem_bo_falloc.c new file mode 100644 index 000..d6b7f10 --- /dev/null +++ b/tests/gem_bo_falloc.c @@ -0,0 +1,471 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the Software), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * + */ + +#include unistd.h +#include stdlib.h +#include stdio.h +#include string.h +#include fcntl.h +#include inttypes.h +#include errno.h +#include sys/stat.h +#include sys/ioctl.h +#include drm.h +#include i915_drm.h +#include drmtest.h +#include intel_chipset.h +#include intel_gpu_tools.h + +#define OBJECT_SIZE (8 * PAGE_SIZE) +#define COPY_BLT_CMD (229|0x5322|0x6) +#define BLT_WRITE_ALPHA(121) +#define BLT_WRITE_RGB (120) +#define BLT_SRC_TILED (115) +#define BLT_DST_TILED (111) + +static uint8_t buf[OBJECT_SIZE]; + +static uint32_t create_bo(int fd) +{ + int i; + uint32_t page_count; + uint32_t handle; + + handle = gem_create(fd, sizeof(buf)); + page_count = sizeof(buf) / PAGE_SIZE; + + for (i = 0; i page_count; ++i) + memset(buf + (i * PAGE_SIZE), i+1, PAGE_SIZE); + + gem_write(fd, handle, 0, buf, sizeof(buf)); + return handle; +} + +static int gem_linear_blt(int fd, + uint32_t *batch, + uint32_t src, + uint32_t dst, + uint32_t length, + struct drm_i915_gem_relocation_entry *reloc) +{ + uint32_t *b = batch; + int height = length / (16 * 1024); + + igt_assert(height = 116); + + if (height) { + int i = 0; + b[i++] = COPY_BLT_CMD | BLT_WRITE_ALPHA | BLT_WRITE_RGB; + if (intel_gen(intel_get_drm_devid(fd)) = 8) + b[i-1]+=2; + b[i++] = 0xcc 16 | 1 25 | 1 24 | (16*1024); + b[i++] = 0; + b[i++] = height 16 | (4*1024); + b[i++] = 0; + reloc-offset = (b-batch+4) * sizeof(uint32_t); + reloc-delta = 0; + reloc-target_handle = dst; + reloc-read_domains = I915_GEM_DOMAIN_RENDER; + reloc-write_domain = I915_GEM_DOMAIN_RENDER; + reloc-presumed_offset = 0; + reloc++; + if (intel_gen(intel_get_drm_devid(fd)) = 8) + b[i++] = 0; /* FIXME */ + + b[i++] = 0; + b[i++] = 16*1024; + b[i++] = 0; + reloc-offset = (b-batch+7) * sizeof(uint32_t); + if (intel_gen(intel_get_drm_devid(fd)) = 8) + reloc-offset += sizeof(uint32_t); + reloc-delta = 0; + reloc-target_handle = src; + reloc-read_domains = I915_GEM_DOMAIN_RENDER; + reloc-write_domain = 0; + reloc-presumed_offset = 0; + reloc++; + if
[Intel-gfx] [PATCH for stable 3.14 only 1/1] drm/i915: restore QUIRK_NO_PCH_PWM_ENABLE
This reverts the bisected regressing commit bc0bb9fd1c7810407ab810d204bbaecb255fddde Author: Jani Nikula jani.nik...@intel.com Date: Thu Nov 14 12:14:29 2013 +0200 drm/i915: remove QUIRK_NO_PCH_PWM_ENABLE restoring QUIRK_NO_PCH_PWM_ENABLE for a couple of Dell XPS models which broke in 3.14. There is no such revert upstream. We have root caused and fixed the issue upstream, without the quirk, with: commit 39fbc9c8f6765959b55e0b127dd5c57df5a47d67 Author: Jani Nikula jani.nik...@intel.com Date: Wed Apr 9 11:22:06 2014 +0300 drm/i915: check VBT for supported backlight type and commit c675949ec58ca50d5a3ae3c757892f1560f6e896 Author: Jani Nikula jani.nik...@intel.com Date: Wed Apr 9 11:31:37 2014 +0300 drm/i915: do not setup backlight if not available according to VBT While the commits are within the stable rules otherwise, and fix more machines than just the regressed Dell XPS models, we feel backporting them to stable may be too risky. The revert is limited to the broken machines, and the impact should be effectively the same as what the upstream commits do more generally. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=76276 Reported-by: Romain Francoise rom...@orebokech.com CC: Kamal Mostafa ka...@canonical.com CC: Daniel Vetter dan...@ffwll.ch CC: sta...@vger.kernel.org (3.14 only) Signed-off-by: Jani Nikula jani.nik...@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_display.c | 16 drivers/gpu/drm/i915/intel_panel.c | 4 3 files changed, 21 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index df77e20e3c3d..697f2150a997 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -734,6 +734,7 @@ enum intel_sbi_destination { #define QUIRK_PIPEA_FORCE (10) #define QUIRK_LVDS_SSC_DISABLE (11) #define QUIRK_INVERT_BRIGHTNESS (12) +#define QUIRK_NO_PCH_PWM_ENABLE (13) struct intel_fbdev; struct intel_fbc_work; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 9b8a7c7ea7fc..963639d9049b 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -10771,6 +10771,17 @@ static void quirk_invert_brightness(struct drm_device *dev) DRM_INFO(applying inverted panel brightness quirk\n); } +/* + * Some machines (Dell XPS13) suffer broken backlight controls if + * BLM_PCH_PWM_ENABLE is set. + */ +static void quirk_no_pcm_pwm_enable(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev-dev_private; + dev_priv-quirks |= QUIRK_NO_PCH_PWM_ENABLE; + DRM_INFO(applying no-PCH_PWM_ENABLE quirk\n); +} + struct intel_quirk { int device; int subsystem_vendor; @@ -10839,6 +10850,11 @@ static struct intel_quirk intel_quirks[] = { /* Acer Aspire 4736Z */ { 0x2a42, 0x1025, 0x0260, quirk_invert_brightness }, + + /* Dell XPS13 HD Sandy Bridge */ + { 0x0116, 0x1028, 0x052e, quirk_no_pcm_pwm_enable }, + /* Dell XPS13 HD and XPS13 FHD Ivy Bridge */ + { 0x0166, 0x1028, 0x058b, quirk_no_pcm_pwm_enable }, }; static void intel_init_quirks(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index 079ea38f14d9..9f1d7a9300e8 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -671,6 +671,10 @@ static void pch_enable_backlight(struct intel_connector *connector) pch_ctl2 = panel-backlight.max 16; I915_WRITE(BLC_PWM_PCH_CTL2, pch_ctl2); + /* XXX: transitional */ + if (dev_priv-quirks QUIRK_NO_PCH_PWM_ENABLE) + return; + pch_ctl1 = 0; if (panel-backlight.active_low_pwm) pch_ctl1 |= BLM_PCH_POLARITY; -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH for stable 3.14 only 0/1] drm/i915: restore QUIRK_NO_PCH_PWM_ENABLE
Stable team - I'd like to hear your opinions on this one. It reverts a commit that regressed in 3.14, but the revert does not exist upstream. Instead we've root caused the issue and provided a real fix for upstream, but we're hesitant to backport that to stable. Functionally the effect of the revert is similar to the real fix, but only impacts a few models, while the real fix has much broader scope. See the commit message for details. Romain, Kamal, I'd appreciate it if you could provide your tested-by with this on top of 3.14.2 on the failing Dell XPS models. Thanks. BR, Jani. Jani Nikula (1): drm/i915: restore QUIRK_NO_PCH_PWM_ENABLE drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_display.c | 16 drivers/gpu/drm/i915/intel_panel.c | 4 3 files changed, 21 insertions(+) -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH for stable 3.14 only 1/1] drm/i915: restore QUIRK_NO_PCH_PWM_ENABLE
Jani Nikula jani.nik...@intel.com writes: This reverts the bisected regressing commit bc0bb9fd1c7810407ab810d204bbaecb255fddde Author: Jani Nikula jani.nik...@intel.com Date: Thu Nov 14 12:14:29 2013 +0200 drm/i915: remove QUIRK_NO_PCH_PWM_ENABLE restoring QUIRK_NO_PCH_PWM_ENABLE for a couple of Dell XPS models which broke in 3.14. I've been running with this revert since v3.14-rc (and now v3.14.2), so: Tested-by: Romain Francoise rom...@orebokech.com ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 49/71] drm/i915/chv: Add CHV display support
From: Rafael Barbalho rafael.barba...@intel.com Add support for the third pipe in cherrview v2: Don't use spaces for indentation (Jani) Wrap long lines Reviewed-by: Imre Deak imre.d...@intel.com Signed-off-by: Rafael Barbalho rafael.barba...@intel.com [vsyrjala: slightly massaged the patch] Signed-off-by: Ville Syrjälä ville.syrj...@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.c | 12 drivers/gpu/drm/i915/i915_reg.h | 11 --- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 3f57237..0fd3046 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -49,6 +49,17 @@ static struct drm_driver driver; .dpll_md_offsets = { DPLL_A_MD_OFFSET, DPLL_B_MD_OFFSET }, \ .palette_offsets = { PALETTE_A_OFFSET, PALETTE_B_OFFSET } +#define GEN_CHV_PIPEOFFSETS \ + .pipe_offsets = { PIPE_A_OFFSET, PIPE_B_OFFSET, \ + CHV_PIPE_C_OFFSET }, \ + .trans_offsets = { TRANSCODER_A_OFFSET, TRANSCODER_B_OFFSET, \ + CHV_TRANSCODER_C_OFFSET, }, \ + .dpll_offsets = { DPLL_A_OFFSET, DPLL_B_OFFSET, \ + CHV_DPLL_C_OFFSET }, \ + .dpll_md_offsets = { DPLL_A_MD_OFFSET, DPLL_B_MD_OFFSET, \ +CHV_DPLL_C_MD_OFFSET }, \ + .palette_offsets = { PALETTE_A_OFFSET, PALETTE_B_OFFSET, \ +CHV_PALETTE_C_OFFSET } static const struct intel_device_info intel_i830_info = { .gen = 2, .is_mobile = 1, .cursor_needs_physical = 1, .num_pipes = 2, @@ -286,6 +297,7 @@ static const struct intel_device_info intel_cherryview_info = { .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, .is_valleyview = 1, .display_mmio_offset = VLV_DISPLAY_BASE, + GEN_CHV_PIPEOFFSETS, }; /* diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 74ac1c2..9138eff 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1440,6 +1440,7 @@ enum punit_power_well { */ #define DPLL_A_OFFSET 0x6014 #define DPLL_B_OFFSET 0x6018 +#define CHV_DPLL_C_OFFSET 0x6030 #define DPLL(pipe) (dev_priv-info.dpll_offsets[pipe] + \ dev_priv-info.display_mmio_offset) @@ -1531,6 +1532,7 @@ enum punit_power_well { #define DPLL_A_MD_OFFSET 0x601c /* 965+ only */ #define DPLL_B_MD_OFFSET 0x6020 /* 965+ only */ +#define CHV_DPLL_C_MD_OFFSET 0x603c #define DPLL_MD(pipe) (dev_priv-info.dpll_md_offsets[pipe] + \ dev_priv-info.display_mmio_offset) @@ -1727,6 +1729,7 @@ enum punit_power_well { */ #define PALETTE_A_OFFSET 0xa000 #define PALETTE_B_OFFSET 0xa800 +#define CHV_PALETTE_C_OFFSET 0xc000 #define PALETTE(pipe) (dev_priv-info.palette_offsets[pipe] + \ dev_priv-info.display_mmio_offset) @@ -2216,6 +2219,7 @@ enum punit_power_well { #define TRANSCODER_A_OFFSET 0x6 #define TRANSCODER_B_OFFSET 0x61000 #define TRANSCODER_C_OFFSET 0x62000 +#define CHV_TRANSCODER_C_OFFSET 0x63000 #define TRANSCODER_EDP_OFFSET 0x6f000 #define _TRANSCODER2(pipe, reg) (dev_priv-info.trans_offsets[(pipe)] - \ @@ -3543,9 +3547,10 @@ enum punit_power_well { #define PIPESTAT_INT_ENABLE_MASK 0x7fff #define PIPESTAT_INT_STATUS_MASK 0x -#define PIPE_A_OFFSET 0x7 -#define PIPE_B_OFFSET 0x71000 -#define PIPE_C_OFFSET 0x72000 +#define PIPE_A_OFFSET 0x7 +#define PIPE_B_OFFSET 0x71000 +#define PIPE_C_OFFSET 0x72000 +#define CHV_PIPE_C_OFFSET 0x74000 /* * There's actually no pipe EDP. Some pipe registers have * simply shifted from the pipe to the transcoder, while -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 53/71] drm/i915/chv: Configure crtc_mask correctly for CHV
From: Ville Syrjälä ville.syrj...@linux.intel.com On CHV pipe C can driver only port D, and pipes A and B can drivbe only ports B and C. Configure the crtc_mask appropriately to reflect that. v2: Moar braces (Jani) Signed-off-by: Ville Syrjälä ville.syrj...@linux.intel.com --- drivers/gpu/drm/i915/intel_dp.c | 9 - drivers/gpu/drm/i915/intel_hdmi.c | 9 - 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 27e0c86..a3cb9d8 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -4079,7 +4079,14 @@ intel_dp_init(struct drm_device *dev, int output_reg, enum port port) intel_dig_port-dp.output_reg = output_reg; intel_encoder-type = INTEL_OUTPUT_DISPLAYPORT; - intel_encoder-crtc_mask = (1 0) | (1 1) | (1 2); + if (IS_CHERRYVIEW(dev)) { + if (port == PORT_D) + intel_encoder-crtc_mask = 1 2; + else + intel_encoder-crtc_mask = (1 0) | (1 1); + } else { + intel_encoder-crtc_mask = (1 0) | (1 1) | (1 2); + } intel_encoder-cloneable = 0; intel_encoder-hot_plug = intel_dp_hot_plug; diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 1e8d2a9..d4e020e 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1452,7 +1452,14 @@ void intel_hdmi_init(struct drm_device *dev, int hdmi_reg, enum port port) } intel_encoder-type = INTEL_OUTPUT_HDMI; - intel_encoder-crtc_mask = (1 0) | (1 1) | (1 2); + if (IS_CHERRYVIEW(dev)) { + if (port == PORT_D) + intel_encoder-crtc_mask = 1 2; + else + intel_encoder-crtc_mask = (1 0) | (1 1); + } else { + intel_encoder-crtc_mask = (1 0) | (1 1) | (1 2); + } intel_encoder-cloneable = 1 INTEL_OUTPUT_ANALOG; /* * BSpec is unclear about HDMI+HDMI cloning on g4x, but it seems -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 63/71] drm/i915/chv: Set soft reset override bit for data lane resets
From: Ville Syrjälä ville.syrj...@linux.intel.com The bits we've been setting so far only progagate the reset singal to the data lanes. To actaully force the reset signal we need to set another override bit. v2: Fix mispalced ';' (Mika) Reviewed-by: Mika Kuoppala mika.kuopp...@intel.com Signed-off-by: Ville Syrjälä ville.syrj...@linux.intel.com --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_dp.c | 8 drivers/gpu/drm/i915/intel_hdmi.c | 8 3 files changed, 17 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 206d600..3c2c8b1 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -665,6 +665,7 @@ enum punit_power_well { #define _VLV_PCS_DW1_CH0 0x8204 #define _VLV_PCS_DW1_CH1 0x8404 +#define CHV_PCS_REQ_SOFTRESET_EN (123) #define DPIO_PCS_CLK_CRI_RXEB_EIOS_EN(122) #define DPIO_PCS_CLK_CRI_RXDIGFILTSG_EN (121) #define DPIO_PCS_CLK_DATAWIDTH_SHIFT (6) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 23a8b21..811e1e8 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1854,6 +1854,10 @@ static void chv_post_disable_dp(struct intel_encoder *encoder) mutex_lock(dev_priv-dpio_lock); /* Propagate soft reset to data lane reset */ + val = vlv_dpio_read(dev_priv, pipe, VLV_PCS_DW1(ch)); + val |= CHV_PCS_REQ_SOFTRESET_EN; + vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW1(ch), val); + val = vlv_dpio_read(dev_priv, pipe, VLV_PCS_DW0(ch)); val = ~(DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET); vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW0(ch), val); @@ -1988,6 +1992,10 @@ static void chv_pre_enable_dp(struct intel_encoder *encoder) mutex_lock(dev_priv-dpio_lock); /* Deassert soft data lane reset*/ + val = vlv_dpio_read(dev_priv, pipe, VLV_PCS_DW1(ch)); + val |= CHV_PCS_REQ_SOFTRESET_EN; + vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW1(ch), val); + val = vlv_dpio_read(dev_priv, pipe, VLV_PCS_DW0(ch)); val |= (DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET); vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW0(ch), val); diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 6d86bde..e04b1ae 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1241,6 +1241,10 @@ static void chv_hdmi_post_disable(struct intel_encoder *encoder) mutex_lock(dev_priv-dpio_lock); /* Propagate soft reset to data lane reset */ + val = vlv_dpio_read(dev_priv, pipe, VLV_PCS_DW1(ch)); + val |= CHV_PCS_REQ_SOFTRESET_EN; + vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW1(ch), val); + val = vlv_dpio_read(dev_priv, pipe, VLV_PCS_DW0(ch)); val = ~(DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET); vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW0(ch), val); @@ -1263,6 +1267,10 @@ static void chv_hdmi_pre_enable(struct intel_encoder *encoder) mutex_lock(dev_priv-dpio_lock); /* Deassert soft data lane reset*/ + val = vlv_dpio_read(dev_priv, pipe, VLV_PCS_DW1(ch)); + val |= CHV_PCS_REQ_SOFTRESET_EN; + vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW1(ch), val); + val = vlv_dpio_read(dev_priv, pipe, VLV_PCS_DW0(ch)); val |= (DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET); vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW0(ch), val); -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 41/71] drm/i915/chv: Add some workaround notes
On Fri, Apr 25, 2014 at 05:43:55PM -0300, Paulo Zanoni wrote: 2014-04-09 7:28 GMT-03:00 ville.syrj...@linux.intel.com: From: Ville Syrjälä ville.syrj...@linux.intel.com We implement the following workarounds: * WaDisableAsyncFlipPerfMode:chv * WaDisableSemaphoreAndSyncFlipWait:chv (at least partially) In the rebased version (on your gitorious tree, chv_rebase branch), the chunk for this WA got removed. I don't know if this was an accident or not. We need to, at least, fix the commit message. Yeah I misread the spec and though that the idle msg disable bit is there for all rings. But after rechecking I noticed that it was only valid for the render ring. I'll resend this patch with the WaDisableSemaphoreAndSyncFlipWait comment dropped. * WaProgramMiArbOnOffAroundMiSetContext:chv Signed-off-by: Ville Syrjälä ville.syrj...@linux.intel.com --- drivers/gpu/drm/i915/i915_gem.c | 1 + drivers/gpu/drm/i915/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 84a7171..a9c33ec 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4376,6 +4376,7 @@ static int i915_gem_init_rings(struct drm_device *dev) struct intel_ring_buffer *ring; int i; + /* WaDisableSemaphoreAndSyncFlipWait:chv */ for_each_ring(ring, dev_priv, i) I915_WRITE(RING_RC_PSMI_CONTROL(ring), _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE)); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 28a2b15..142df90 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -606,7 +606,7 @@ mi_set_context(struct intel_ring_buffer *ring, if (ret) return ret; - /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw */ + /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ Do we really need this WA for BDW and CHV? I couldn't find them on my docs for gen8... It's listed in bspec. Thanks, Paulo if (INTEL_INFO(ring-dev)-gen = 7) intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE); else diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 913b8ab..24022c5 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -581,7 +581,7 @@ static int init_render_ring(struct intel_ring_buffer *ring) * to use MI_WAIT_FOR_EVENT within the CS. It should already be * programmed to '1' on all products. * -* WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw +* WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw,chv */ if (INTEL_INFO(dev)-gen = 6) I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE)); -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Paulo Zanoni -- Ville Syrjälä Intel OTC ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 41/71] drm/i915/chv: Add some workaround notes
From: Ville Syrjälä ville.syrj...@linux.intel.com We implement the following workarounds: * WaDisableAsyncFlipPerfMode:chv * WaProgramMiArbOnOffAroundMiSetContext:chv v2: Drop WaDisableSemaphoreAndSyncFlipWait note Signed-off-by: Ville Syrjälä ville.syrj...@linux.intel.com --- drivers/gpu/drm/i915/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 30b355a..37dc36d 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -614,7 +614,7 @@ mi_set_context(struct intel_ring_buffer *ring, if (ret) return ret; - /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw */ + /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ if (INTEL_INFO(ring-dev)-gen = 7) intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE); else diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index eb3dd26..b025a51 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -599,7 +599,7 @@ static int init_render_ring(struct intel_ring_buffer *ring) * to use MI_WAIT_FOR_EVENT within the CS. It should already be * programmed to '1' on all products. * -* WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw +* WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw,chv */ if (INTEL_INFO(dev)-gen = 6) I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE)); -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 05.1/24] drm/i915: Make sure computed watermarks never overflow the registers
From: Ville Syrjälä ville.syrj...@linux.intel.com When we calculate the watermarks for a pipe make sure we leave any level fully zeroed out if it would exceed any of the maximum values that fit in the registers. This will be important later when we start to use also disabled watermark levels during LP1+ merging. Signed-off-by: Ville Syrjälä ville.syrj...@linux.intel.com --- drivers/gpu/drm/i915/intel_pm.c | 43 ++--- 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index f061ef1..c722acb 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1921,6 +1921,16 @@ static void ilk_compute_wm_maximums(const struct drm_device *dev, max-fbc = ilk_fbc_wm_reg_max(dev); } +static void ilk_compute_wm_reg_maximums(struct drm_device *dev, + int level, + struct ilk_wm_maximums *max) +{ + max-pri = ilk_plane_wm_reg_max(dev, level, false); + max-spr = ilk_plane_wm_reg_max(dev, level, true); + max-cur = ilk_cursor_wm_reg_max(dev, level); + max-fbc = ilk_fbc_wm_reg_max(dev); +} + static bool ilk_validate_wm_level(int level, const struct ilk_wm_maximums *max, struct intel_wm_level *result) @@ -2178,9 +2188,6 @@ static bool intel_compute_pipe_wm(struct drm_crtc *crtc, }; struct ilk_wm_maximums max; - /* LP0 watermarks always use 1/2 DDB partitioning */ - ilk_compute_wm_maximums(dev, 0, config, INTEL_DDB_PART_1_2, max); - pipe_wm-pipe_enabled = params-active; pipe_wm-sprites_enabled = params-spr.enabled; pipe_wm-sprites_scaled = params-spr.scaled; @@ -2193,15 +2200,37 @@ static bool intel_compute_pipe_wm(struct drm_crtc *crtc, if (params-spr.scaled) max_level = 0; - for (level = 0; level = max_level; level++) - ilk_compute_wm_level(dev_priv, level, params, -pipe_wm-wm[level]); + ilk_compute_wm_level(dev_priv, 0, params, pipe_wm-wm[0]); if (IS_HASWELL(dev) || IS_BROADWELL(dev)) pipe_wm-linetime = hsw_compute_linetime_wm(dev, crtc); + /* LP0 watermarks always use 1/2 DDB partitioning */ + ilk_compute_wm_maximums(dev, 0, config, INTEL_DDB_PART_1_2, max); + /* At least LP0 must be valid */ - return ilk_validate_wm_level(0, max, pipe_wm-wm[0]); + if (!ilk_validate_wm_level(0, max, pipe_wm-wm[0])) + return false; + + ilk_compute_wm_reg_maximums(dev, 1, max); + + for (level = 1; level = max_level; level++) { + struct intel_wm_level wm = {}; + + ilk_compute_wm_level(dev_priv, level, params, wm); + + /* +* Disable any watermark level that exceeds the +* register maximums since such watermarks are +* always invalid. +*/ + if (!ilk_validate_wm_level(level, max, wm)) + break; + + pipe_wm-wm[level] = wm; + } + + return true; } /* -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 05.2/24] drm/i915: Merge LP1+ watermarks in safer way
From: Ville Syrjälä ville.syrj...@linux.intel.com On ILK when we disable a particular watermark level, we must maintain the actual watermark values for that level for some time (until the next vblank possibly). Otherwise we risk underruns. In order to achieve that result we must merge the LP1+ watermarks a bit differently since we must also merge levels that are to be disabled. We must also make sure we don't overflow the fields in the watermark registers in case the calculated watermarks come out too big to fit. As early as possbile we mark all computed watermark levels as disabled if they would exceed the register maximums. We make sure to leave the actual watermarks for such levels zeroed out. The during merging, we take the maxium values for every level, regardless if they're disabled or not. That may seem a bit pointless since at the moment all the watermark levels we merge should have their values zeroed if the level is already disabled. However soon we will be dealing with intermediate watermarks that, in addition to the new watermark values, also contain the previous watermark values, and so levels that are disabled may no longer be zeroed out. v2: Split the patch in two (Paulo) Use if() instead of when merging -enable (Paulo) Signed-off-by: Ville Syrjälä ville.syrj...@linux.intel.com --- drivers/gpu/drm/i915/intel_pm.c | 37 - 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index c722acb..b89fc33 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2242,6 +2242,8 @@ static void ilk_merge_wm_level(struct drm_device *dev, { const struct intel_crtc *intel_crtc; + ret_wm-enable = true; + list_for_each_entry(intel_crtc, dev-mode_config.crtc_list, base.head) { const struct intel_pipe_wm *active = intel_crtc-wm.active; const struct intel_wm_level *wm = active-wm[level]; @@ -2249,16 +2251,19 @@ static void ilk_merge_wm_level(struct drm_device *dev, if (!active-pipe_enabled) continue; + /* +* The watermark values may have been used in the past, +* so we must maintain them in the registers for some +* time even if the level is now disabled. +*/ if (!wm-enable) - return; + ret_wm-enable = false; ret_wm-pri_val = max(ret_wm-pri_val, wm-pri_val); ret_wm-spr_val = max(ret_wm-spr_val, wm-spr_val); ret_wm-cur_val = max(ret_wm-cur_val, wm-cur_val); ret_wm-fbc_val = max(ret_wm-fbc_val, wm-fbc_val); } - - ret_wm-enable = true; } /* @@ -2270,6 +2275,7 @@ static void ilk_wm_merge(struct drm_device *dev, struct intel_pipe_wm *merged) { int level, max_level = ilk_wm_max_level(dev); + int last_enabled_level = max_level; /* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */ if ((INTEL_INFO(dev)-gen = 6 || IS_IVYBRIDGE(dev)) @@ -2285,15 +2291,19 @@ static void ilk_wm_merge(struct drm_device *dev, ilk_merge_wm_level(dev, level, wm); - if (!ilk_validate_wm_level(level, max, wm)) - break; + if (level last_enabled_level) + wm-enable = false; + else if (!ilk_validate_wm_level(level, max, wm)) + /* make sure all following levels get disabled */ + last_enabled_level = level - 1; /* * The spec says it is preferred to disable * FBC WMs instead of disabling a WM level. */ if (wm-fbc_val max-fbc) { - merged-fbc_wm_enabled = false; + if (wm-enable) + merged-fbc_wm_enabled = false; wm-fbc_val = 0; } } @@ -2348,14 +2358,19 @@ static void ilk_compute_wm_results(struct drm_device *dev, level = ilk_wm_lp_to_level(wm_lp, merged); r = merged-wm[level]; - if (!r-enable) - break; - results-wm_lp[wm_lp - 1] = WM3_LP_EN | + /* +* Maintain the watermark values even if the level is +* disabled. Doing otherwise could cause underruns. +*/ + results-wm_lp[wm_lp - 1] = (ilk_wm_lp_latency(dev, level) WM1_LP_LATENCY_SHIFT) | (r-pri_val WM1_LP_SR_SHIFT) | r-cur_val; + if (r-enable) + results-wm_lp[wm_lp - 1] |= WM1_LP_SR_EN; + if (INTEL_INFO(dev)-gen = 8)
[Intel-gfx] [PATCH v2 09/24] drm/i915: Keep vblank interrupts enabled while enabling/disabling planes
From: Ville Syrjälä ville.syrj...@linux.intel.com Becasue of the upcoming vblank interrupt driven watermark update mechanism we will have use for vblank interrupts during plane enabling/disabling. So don't call drm_vblank_off() until planes are off, and call drm_vblank_on() just before we start to enable the planes. v2: Pimp commit message (Paulo) Signed-off-by: Ville Syrjälä ville.syrj...@linux.intel.com --- drivers/gpu/drm/i915/intel_display.c | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 88df4ea..8d2a31e 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3547,6 +3547,8 @@ static void ilk_crtc_enable_planes(struct drm_crtc *crtc) int pipe = intel_crtc-pipe; int plane = intel_crtc-plane; + drm_vblank_on(dev, pipe); + intel_enable_primary_plane(dev_priv, plane, pipe); intel_enable_planes(crtc); intel_crtc_update_cursor(crtc, true); @@ -3557,8 +3559,6 @@ static void ilk_crtc_enable_planes(struct drm_crtc *crtc) mutex_lock(dev-struct_mutex); intel_update_fbc(dev); mutex_unlock(dev-struct_mutex); - - drm_vblank_on(dev, pipe); } static void ilk_crtc_disable_planes(struct drm_crtc *crtc) @@ -3570,7 +3570,6 @@ static void ilk_crtc_disable_planes(struct drm_crtc *crtc) int plane = intel_crtc-plane; intel_crtc_wait_for_pending_flips(crtc); - drm_vblank_off(dev, pipe); if (dev_priv-fbc.plane == plane) intel_disable_fbc(dev); @@ -3581,6 +3580,8 @@ static void ilk_crtc_disable_planes(struct drm_crtc *crtc) intel_disable_planes(crtc); intel_disable_primary_plane(dev_priv, plane, pipe); intel_wait_for_vblank(dev, pipe); + + drm_vblank_off(dev, pipe); } static void ironlake_crtc_enable(struct drm_crtc *crtc) -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PULL] drm-intel-next
Hi Dave, drm-intel-next-2014-04-16: - vlv infoframe fixes from Jesse - dsi/mipi fixes from Shobhit - gen8 pageflip fixes for LRI/SRM from Damien - cmd parser fixes from Brad Volkin - some prep patches for CHV, DRRS, ... - and tons of little things all over drm-intel-next-2014-04-04: - cmd parser for gen7 but only in enforcing and not yet granting mode - the batch copying stuff is still missing. Also performance is a bit ... rough (Brad Volkin + OACONTROL fix from Ken). - deprecate UMS harder (i.e. CONFIG_BROKEN) - interrupt rework from Paulo Zanoni - runtime PM support for bdw and snb, again from Paulo - a pile of refactorings from various people all over the place to prep for new stuff (irq reworks, power domain polish, ...) drm-intel-next-2014-04-04: - cmd parser for gen7 but only in enforcing and not yet granting mode - the batch copying stuff is still missing. Also performance is a bit ... rough (Brad Volkin + OACONTROL fix from Ken). - deprecate UMS harder (i.e. CONFIG_BROKEN) - interrupt rework from Paulo Zanoni - runtime PM support for bdw and snb, again from Paulo - a pile of refactorings from various people all over the place to prep for new stuff (irq reworks, power domain polish, ...) As discussed on irc this contains a (not yet fully tuned and also not yet in granting mode) cmd parser for gen7. Performance is still a bit rough, but not quite as bad as originally feared (Ken later on discovered that he also changed something in his glamour setup which made things worse). If it doesn't get better (and ofc if we don't get all the missing bits in for granting mode) I'll disable it before 3.16 again. But I want to give this beast as much testing as possible for now to avoid ugly regressions once we switch it on. Also please don't use the autogenerate merge commit since that'll miss the stuff from the 1st drm-intel-next tag. If I read the merges in -nightly correctly there's a bit a conflict in i915_gem_context.c. I can provide an example merge if you want (or otherwise just peak at linux-next or drm-intel-nightly). Cheers, Daniel The following changes since commit c39b06951f1dc2e384650288676c5b7dcc0ec92c: DRM: armada: fix corruption while loading cursors (2014-04-08 10:51:03 +1000) are available in the git repository at: git://anongit.freedesktop.org/drm-intel tags/drm-intel-next-2014-04-16 for you to fetch changes up to c79057922ed6c2c6df1214e6ab4414fea1b23db2: drm/i915: Remove vblank wait from haswell_write_eld (2014-04-16 18:52:47 +0200) - vlv infoframe fixes from Jesse - dsi/mipi fixes from Shobhit - gen8 pageflip fixes for LRI/SRM from Damien - cmd parser fixes from Brad Volkin - some prep patches for CHV, DRRS, ... - and tons of little things all over Akash Goel (2): drm/i915: Enabling the TLB invalidate bit in GFX Mode register drm/i915/vlv:Implement the WA 'WaDisable_RenderCache_OperationalFlush' Ben Widawsky (10): drm/i915: Split out GTT specific header file drm/i915: Allow full PPGTT with param override drm/i915/bdw: Set initial rps freq to RP1 drm/i915/bdw: Extract rp_state_caps logic drm/i915/bdw: RPS frequency bits are the same as HSW drm/i915/bdw: Expand FADD to 64bit drm/i915: Invariably invalidate before ctx switch drm/i915: Unref context on failed eb_create drm/i915: Dump the whole context object. drm/i915/bdw: Add 42ms delay for IPS disable Brad Volkin (15): drm/i915: Initial command parser table definitions drm/i915: Reject privileged commands drm/i915: Allow some privileged commands from master drm/i915: Add register whitelists for mesa drm/i915: Add register whitelist for DRM master drm/i915: Enable register whitelist checks drm/i915: Reject commands that explicitly generate interrupts drm/i915: Enable PPGTT command parser checks drm/i915: Reject commands that would store to global HWS page drm/i915: Add a CMD_PARSER_VERSION getparam drm/i915: Enable command parsing by default drm/i915: BUG_ON() when cmd/reg tables are not sorted drm/i915: Refactor cmd parser checks into a function drm/i915: Track OACONTROL register enable/disable during parsing drm/i915: Add more registers to the whitelist for mesa Chris Wilson (4): drm/i915: Rename GFX_TLB_INVALIDATE_ALWAYS drm/i915: Add PM interrupt details and RPS thresholds to debugfs drm/i915: Move all ring resets before setting the HWS page drm/i915: dma_buf_vunmap is presumed not to fail, don't let it Christoph Jaeger (1): drm/i915: drop __FUNCTION__ as argument to DRM_DEBUG_KMS Damien Lespiau (10): drm/i915: Don't store the max cursor width/height in the crtc drm/i915: Hide vlv_force_wake_{get, put}() in intel_uncore.c drm/i915: Hide the per forcewake-engine
Re: [Intel-gfx] [RESEND][PATCH][linux-next] Revert drm/i915: fix build warning on 32-bit (v2)
On Mon, Apr 28, 2014 at 03:03:23PM +0200, Jan Moskyto Matejka wrote: This reverts commit 60f2b4af1258c05e6b037af866be81abc24438f7. The same warning has been fixed in e5081a538a565284fec5f30a937d98e460d5e780 and these two commits got merged in 74e99a84de2d0980320612db8015ba606af42114 which caused another warning. Simply, the reverted commit casted the pointer difference to unsigned long and the other commit changed the output type from long to ptrdiff_t. The other commit fixes the original warning the better way so I'm reverting this commit now. Signed-off-by: Jan Moskyto Matejka m...@suse.cz My apologies for missing this the first time around. Queued for -next, thanks for the patch. -Daniel --- drivers/gpu/drm/i915/i915_cmd_parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 4cf6d02..0eaed44 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -405,7 +405,7 @@ int i915_parse_cmds(struct intel_ring_buffer *ring, DRM_DEBUG_DRIVER(CMD: Command length exceeds batch length: 0x%08X length=%d batchlen=%td\n, *cmd, length, - (unsigned long)(batch_end - cmd)); + batch_end - cmd); ret = -EINVAL; break; } -- 1.8.4.5 -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915: Fix assert_plane warning during FDI link train
On Fri, Apr 25, 2014 at 10:12:07PM +0300, ville.syrj...@linux.intel.com wrote: From: Ville Syrjälä ville.syrj...@linux.intel.com assert_plane_enabled() is now triggering during FDI link train because we no longer enable planes that early. This problem got introduced in: commit a5c4d7bc187bd13bc11ac06bb4ea3a0d4001aa4d Author: Ville Syrjälä ville.syrj...@linux.intel.com Date: Fri Mar 7 18:32:13 2014 +0200 drm/i915: Disable/enable planes as the first/last thing during modeset on ILK+ Just drop the assert since we shouldn't need planes for link training. Signed-off-by: Ville Syrjälä ville.syrj...@linux.intel.com Queued for -next, thanks for the patch. I've spotted this in a QA report, but afaik they didn't yet file a bug report. -Daniel -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915: bdw: fix RC6 enabled status reporting and disable runtime PM
On Mon, Apr 28, 2014 at 12:03:59PM +0300, Imre Deak wrote: On BDW we don't enable RC6 at the moment, but this isn't reflected in the (sanitized) i915.enable_rc6 option. So make enable_rc6 report correctly that RC6 is disabled, which will also effectively disable RPM on BDW (since RPM depends on RC6). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=77565 Signed-off-by: Imre Deak imre.d...@intel.com Officially we've merged runtime PM, but if it doesn't work we need to temporarily disable. Queued for -next, thanks for the patch. -Daniel --- drivers/gpu/drm/i915/intel_pm.c | 4 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index d49ec02..19020e5 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3260,6 +3260,10 @@ static int sanitize_rc6_option(const struct drm_device *dev, int enable_rc6) if (INTEL_INFO(dev)-gen == 5 !IS_IRONLAKE_M(dev)) return 0; + /* Disable RC6 on Broadwell for now */ + if (IS_BROADWELL(dev)) + return 0; + /* Respect the kernel parameter if it is set */ if (enable_rc6 = 0) { int mask; -- 1.8.4 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915: restore backlight precision when converting from opregion
On Mon, Apr 28, 2014 at 11:19:29AM +0800, Aaron Lu wrote: When we set backlight on behalf of ACPI opregion, we will convert the backlight value in the 0-255 range defined in opregion to the actual hardware level. Commit 22505b82a2 (drm/i915: avoid brightness overflow when doing scale) is meant to fix the overflow problem when doing the conversion, but it also caused a problem that the converted hardware level doesn't quite represent the intended value: say user wants maximum backlight level(255 in opregion's range), then we will calculate the actual hardware level to be: level = freq / max * level, where freq is the hardware's max backlight level(937 on an user's box), and max and level are all 255. The converted value should be 937 but the above calculation will yield 765. To fix this issue, just use 64 bits to do the calculation to keep the precision and avoid overflow at the same time. Buglink: https://bugzilla.kernel.org/show_bug.cgi?id=72491 Reported-and-tested-by: Nico Schottelius nico-bugzilla.kernel@schottelius.org Signed-off-by: Aaron Lu aaron...@intel.com --- drivers/gpu/drm/i915/intel_panel.c | 5 + 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index a953b081ee38..bdd2f24b7a6b 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -502,10 +502,7 @@ void intel_panel_set_backlight(struct intel_connector *connector, u32 level, /* scale to hardware max, but be careful to not overflow */ freq = panel-backlight.max; - if (freq max) - level = level * freq / max; - else - level = freq / max * level; + level = (u64)level * freq / max; 64bit divisions won't compile on 32bit. You need one of the DO_DIV macros, or whatever they're called again. I pain, I know ;-) -Daniel panel-backlight.level = level; if (panel-backlight.device) -- 1.9.0 -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH for stable 3.14 only 1/1] drm/i915: restore QUIRK_NO_PCH_PWM_ENABLE
On Mon, Apr 28, 2014 at 01:10:07PM +0300, Jani Nikula wrote: This reverts the bisected regressing commit bc0bb9fd1c7810407ab810d204bbaecb255fddde Author: Jani Nikula jani.nik...@intel.com Date: Thu Nov 14 12:14:29 2013 +0200 drm/i915: remove QUIRK_NO_PCH_PWM_ENABLE restoring QUIRK_NO_PCH_PWM_ENABLE for a couple of Dell XPS models which broke in 3.14. There is no such revert upstream. We have root caused and fixed the issue upstream, without the quirk, with: commit 39fbc9c8f6765959b55e0b127dd5c57df5a47d67 Author: Jani Nikula jani.nik...@intel.com Date: Wed Apr 9 11:22:06 2014 +0300 drm/i915: check VBT for supported backlight type and commit c675949ec58ca50d5a3ae3c757892f1560f6e896 Author: Jani Nikula jani.nik...@intel.com Date: Wed Apr 9 11:31:37 2014 +0300 drm/i915: do not setup backlight if not available according to VBT While the commits are within the stable rules otherwise, and fix more machines than just the regressed Dell XPS models, we feel backporting them to stable may be too risky. The revert is limited to the broken machines, and the impact should be effectively the same as what the upstream commits do more generally. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=76276 Reported-by: Romain Francoise rom...@orebokech.com CC: Kamal Mostafa ka...@canonical.com CC: Daniel Vetter dan...@ffwll.ch CC: sta...@vger.kernel.org (3.14 only) Signed-off-by: Jani Nikula jani.nik...@intel.com Ack from my side since the VBT-based fix we have in 3.15 really is a bit too risky for backporting and should get the full -rc cycle for testing. But we just can't let existing users on affected hw hang in there for 2 months. -Daniel --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_display.c | 16 drivers/gpu/drm/i915/intel_panel.c | 4 3 files changed, 21 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index df77e20e3c3d..697f2150a997 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -734,6 +734,7 @@ enum intel_sbi_destination { #define QUIRK_PIPEA_FORCE (10) #define QUIRK_LVDS_SSC_DISABLE (11) #define QUIRK_INVERT_BRIGHTNESS (12) +#define QUIRK_NO_PCH_PWM_ENABLE (13) struct intel_fbdev; struct intel_fbc_work; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 9b8a7c7ea7fc..963639d9049b 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -10771,6 +10771,17 @@ static void quirk_invert_brightness(struct drm_device *dev) DRM_INFO(applying inverted panel brightness quirk\n); } +/* + * Some machines (Dell XPS13) suffer broken backlight controls if + * BLM_PCH_PWM_ENABLE is set. + */ +static void quirk_no_pcm_pwm_enable(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev-dev_private; + dev_priv-quirks |= QUIRK_NO_PCH_PWM_ENABLE; + DRM_INFO(applying no-PCH_PWM_ENABLE quirk\n); +} + struct intel_quirk { int device; int subsystem_vendor; @@ -10839,6 +10850,11 @@ static struct intel_quirk intel_quirks[] = { /* Acer Aspire 4736Z */ { 0x2a42, 0x1025, 0x0260, quirk_invert_brightness }, + + /* Dell XPS13 HD Sandy Bridge */ + { 0x0116, 0x1028, 0x052e, quirk_no_pcm_pwm_enable }, + /* Dell XPS13 HD and XPS13 FHD Ivy Bridge */ + { 0x0166, 0x1028, 0x058b, quirk_no_pcm_pwm_enable }, }; static void intel_init_quirks(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index 079ea38f14d9..9f1d7a9300e8 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -671,6 +671,10 @@ static void pch_enable_backlight(struct intel_connector *connector) pch_ctl2 = panel-backlight.max 16; I915_WRITE(BLC_PWM_PCH_CTL2, pch_ctl2); + /* XXX: transitional */ + if (dev_priv-quirks QUIRK_NO_PCH_PWM_ENABLE) + return; + pch_ctl1 = 0; if (panel-backlight.active_low_pwm) pch_ctl1 |= BLM_PCH_POLARITY; -- 1.9.1 -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 03/10] drm/i915/chv: Enable Render Standby (RC6) for Cheeryview
On Mon, 2014-04-21 at 13:34 +0530, deepa...@linux.intel.com wrote: From: Deepak S deepa...@linux.intel.com v2: Configure PCBR if BIOS fails allocate pcbr (deepak) v3: Fix PCBR condition check during CHV RC6 Enable flag set Signed-off-by: Deepak S deepa...@linux.intel.com --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 100 +++- 2 files changed, 99 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index b951d61..7090b42 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -5134,6 +5134,7 @@ enum punit_power_well { #define GEN6_GT_GFX_RC6 0x138108 #define GEN6_GT_GFX_RC6p 0x13810C #define GEN6_GT_GFX_RC6pp0x138110 +#define VLV_PCBR_ADDR_SHIFT 12 #define GEN6_PCODE_MAILBOX 0x138124 #define GEN6_PCODE_READY (131) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index f3c5bce..421a4cc 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3264,6 +3264,18 @@ static void gen6_disable_rps(struct drm_device *dev) gen6_disable_rps_interrupts(dev); } +static void cherryview_disable_rps(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev-dev_private; + + I915_WRITE(GEN6_RC_CONTROL, 0); + + if (dev_priv-vlv_pctx) { + drm_gem_object_unreference(dev_priv-vlv_pctx-base); + dev_priv-vlv_pctx = NULL; + } +} + static void valleyview_disable_rps(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev-dev_private; @@ -3642,6 +3654,28 @@ static void valleyview_check_pctx(struct drm_i915_private *dev_priv) dev_priv-vlv_pctx-stolen-start); } +static void cherryview_setup_pctx(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev-dev_private; + unsigned long pctx_paddr; + struct i915_gtt *gtt = dev_priv-gtt; + u32 pcbr; + int pctx_size = 32*1024; + + pcbr = I915_READ(VLV_PCBR); + if ((pcbr VLV_PCBR_ADDR_SHIFT) == 0) { + /* + * From the Gunit register HAS: + * The Gfx driver is expected to program this register and ensure + * proper allocation within Gfx stolen memory. For example, this + * register should be programmed such than the PCBR range does not + * overlap with other relevant ranges. + */ + pctx_paddr = (dev_priv-mm.stolen_base + gtt-stolen_size - pctx_size); This area should be reserved. + I915_WRITE(VLV_PCBR, pctx_paddr); + } +} + static void valleyview_setup_pctx(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev-dev_private; @@ -3697,6 +3731,61 @@ static void valleyview_cleanup_pctx(struct drm_device *dev) dev_priv-vlv_pctx = NULL; } +static void cherryview_enable_rps(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev-dev_private; + struct intel_ring_buffer *ring; + u32 gtfifodbg, rc6_mode = 0, pcbr; + int i; + + WARN_ON(!mutex_is_locked(dev_priv-rps.hw_lock)); + + if ((gtfifodbg = I915_READ(GTFIFODBG))) { + DRM_DEBUG_DRIVER(GT fifo had a previous error %x\n, + gtfifodbg); + I915_WRITE(GTFIFODBG, gtfifodbg); + } + + cherryview_setup_pctx(dev); This should be called from intel_init_gt_powersave(). + + /* 1a 1b: Get forcewake during program sequence. Although the driver + * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ + gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); + + /* 2a: Program RC6 thresholds.*/ + I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 16); + I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ + I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ + + for_each_ring(ring, dev_priv, i) + I915_WRITE(RING_MAX_IDLE(ring-mmio_base), 10); + + I915_WRITE(GEN6_RC6_THRESHOLD, 5); /* 50/125ms per EI */ + + /* allows RC6 residency counter to work */ + I915_WRITE(VLV_COUNTER_CONTROL, +_MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | + VLV_MEDIA_RC6_COUNT_EN | + VLV_RENDER_RC6_COUNT_EN)); + + /* Todo: If BIOS has not configured PCBR + * then allocate in BIOS Reserved */ + + /* For now we assume BIOS is allocating and populating the PCBR */ + pcbr = I915_READ(VLV_PCBR); + + DRM_DEBUG_DRIVER(PCBR offset : 0x%x\n, pcbr); + + /* 3: Enable RC6 */ + if ((intel_enable_rc6(dev) INTEL_RC6_ENABLE) +
Re: [Intel-gfx] [PATCH 18/71] drm/i915/chv: Add vlv_pipe_to_channel
On Wed, 2014-04-09 at 13:28 +0300, ville.syrj...@linux.intel.com wrote: From: Chon Ming Lee chon.ming@intel.com Cherryview has 3 pipes. Some of the pll dpio offset calculation is based on pipe number. Need to use vlv_pipe_to_channel to calculate the correct phy channel to use for the pipe. Signed-off-by: Chon Ming Lee chon.ming@intel.com Reviewed-by: Imre Deak imre.d...@intel.com --- drivers/gpu/drm/i915/intel_drv.h | 14 ++ 1 file changed, 14 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 087e471..e572799 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -544,6 +544,20 @@ vlv_dport_to_channel(struct intel_digital_port *dport) } } +static inline int +vlv_pipe_to_channel(enum pipe pipe) +{ + switch (pipe) { + case PIPE_A: + case PIPE_C: + return DPIO_CH0; + case PIPE_B: + return DPIO_CH1; + default: + BUG(); + } +} + static inline struct drm_crtc * intel_get_crtc_for_pipe(struct drm_device *dev, int pipe) { signature.asc Description: This is a digitally signed message part ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 43/49] drm/i915/bdw: Handle context switch events
tmp = I915_READ(GEN8_GT_IIR(0)); if (tmp) { ret = IRQ_HANDLED; + rcs = tmp GEN8_RCS_IRQ_SHIFT; - bcs = tmp GEN8_BCS_IRQ_SHIFT; + ring = dev_priv-ring[RCS]; if (rcs GT_RENDER_USER_INTERRUPT) - notify_ring(dev, dev_priv-ring[RCS]); + notify_ring(dev, ring); + if (rcs GEN8_GT_CONTEXT_SWITCH_INTERRUPT) + gen8_handle_context_events(ring); Handling the context events here can generate a new execlist submission, which if a small enough workload, can finish and generate a new context event interrupt before we ack this interrupt. When we ack this interrupt, we clear the new one too, loosing an interrupt. Moving the I915_WRITE(GEN8_GT_IIR(0), tmp); to just inside the if (tmp) { conditional (or anywhere before this call) fixes this issue. There is no harm in acking the interrupt immediately as we have the read stored in tmp. -Original Message- From: Daniel, Thomas Sent: Monday, April 28, 2014 10:58 AM To: Beckett, Robert; Mateo Lozano, Oscar; Barbalho, Rafael; Ewins, Jon Subject: RE: Re: [Intel-gfx] [PATCH 43/49] drm/i915/bdw: Handle context switch events Hi Bob, Looks like a good catch, and a sensible fix. Thomas. I agree with Thomas. Will add to the next revision of the series. Thanks! Oscar ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 03/10] drm/i915/chv: Enable Render Standby (RC6) for Cheeryview
On Mon, Apr 28, 2014 at 05:29:46PM +0300, Imre Deak wrote: +static void cherryview_setup_pctx(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev-dev_private; + unsigned long pctx_paddr; + struct i915_gtt *gtt = dev_priv-gtt; + u32 pcbr; + int pctx_size = 32*1024; + + pcbr = I915_READ(VLV_PCBR); + if ((pcbr VLV_PCBR_ADDR_SHIFT) == 0) { + /* +* From the Gunit register HAS: +* The Gfx driver is expected to program this register and ensure +* proper allocation within Gfx stolen memory. For example, this +* register should be programmed such than the PCBR range does not +* overlap with other relevant ranges. +*/ + pctx_paddr = (dev_priv-mm.stolen_base + gtt-stolen_size - pctx_size); This area should be reserved. We've had a really lengthy discussion internally about the bios-reserved chunk in stolen. It was stalled due to (imo unjustified) fear to leak information what the bios actually uses this for. If we need to reserve more of stolen than we currently do we need to pick up that approach again instead of adding more bandaids. -Daniel -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v3] drm/i915: Add boot paramter to control rps boost at boot time.
From: Deepak S deepa...@linux.intel.com We are adding a module paramter to control rps boost. By default, we enable the boost for better performace. Based on the need (perf/power) we can either enable/disable. v2: Addressed rps default comment (Jani) v3: Use bool to represent the boot parameter (Ville). Signed-off-by: Deepak S deepa...@linux.intel.com Reviewed-by: Ville Syrjälä ville.syrj...@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.h| 1 + drivers/gpu/drm/i915/i915_gem.c| 2 +- drivers/gpu/drm/i915/i915_params.c | 5 + 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e81feab..6136aab 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1945,6 +1945,7 @@ struct i915_params { bool reset; bool disable_display; bool disable_vtd_wa; + bool enable_rps_boost; }; extern struct i915_params i915 __read_mostly; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b00a77e..f2b3262 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1049,7 +1049,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, timeout_expire = timeout ? jiffies + timespec_to_jiffies_timeout(timeout) : 0; - if (INTEL_INFO(dev)-gen = 6 can_wait_boost(file_priv)) { + if (INTEL_INFO(dev)-gen = 6 can_wait_boost(file_priv) i915.enable_rps_boost) { gen6_rps_boost(dev_priv); if (file_priv) mod_delayed_work(dev_priv-wq, diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index d05a2af..b51da7c 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -48,6 +48,7 @@ struct i915_params i915 __read_mostly = { .disable_display = 0, .enable_cmd_parser = 1, .disable_vtd_wa = 0, + .enable_rps_boost = true, }; module_param_named(modeset, i915.modeset, int, 0400); @@ -156,3 +157,7 @@ MODULE_PARM_DESC(disable_vtd_wa, Disable all VT-d workarounds (default: false) module_param_named(enable_cmd_parser, i915.enable_cmd_parser, int, 0600); MODULE_PARM_DESC(enable_cmd_parser, Enable command parsing (1=enabled [default], 0=disabled)); + +module_param_named(enable_rps_boost, i915.enable_rps_boost, bool, 0600); +MODULE_PARM_DESC(enable_rps_boost, +Enable/Disable boost RPS frequency (default: true)); -- 1.8.5.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 66/71] drm/i915/chv: Use RMW to toggle swing calc init
ville.syrj...@linux.intel.com writes: From: Ville Syrjälä ville.syrj...@linux.intel.com The spec only tells us to set individual bits here and there. So we use RMW for most things. Do the same for the swing calc init. Eventually we should optimize things to just blast the final value in with group access whenever possible. But to do that someone needs to take a good look at what's the reset value for each registers, and possibly if the BIOS manages to frob with some of them. For now use RMW access always. Signed-off-by: Ville Syrjälä ville.syrj...@linux.intel.com Some accesses use define masks, some hardcoded ones. But as they were there to begin with, not a problem of these patches. For future work, I think we could get rid of quite amount of DWXX_CHXX definitions if we would build macros that setup the function,lane/group and broadcast. Patches 65 and 66, Reviewed-by: Mika Kuoppala mika.kuopp...@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 7 +++ drivers/gpu/drm/i915/intel_dp.c | 17 ++--- drivers/gpu/drm/i915/intel_hdmi.c | 18 ++ 3 files changed, 35 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index b91232f..7056994 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -698,6 +698,13 @@ enum punit_power_well { #define DPIO_PCS_SWING_CALC_TX1_TX3(131) #define CHV_PCS_DW10(ch) _PORT(ch, _CHV_PCS_DW10_CH0, _CHV_PCS_DW10_CH1) +#define _VLV_PCS01_DW10_CH0 0x0228 +#define _VLV_PCS23_DW10_CH0 0x0428 +#define _VLV_PCS01_DW10_CH1 0x2628 +#define _VLV_PCS23_DW10_CH1 0x2828 +#define VLV_PCS01_DW10(port) _PORT(port, _VLV_PCS01_DW10_CH0, _VLV_PCS01_DW10_CH1) +#define VLV_PCS23_DW10(port) _PORT(port, _VLV_PCS23_DW10_CH0, _VLV_PCS23_DW10_CH1) + #define _VLV_PCS_DW11_CH00x822c #define _VLV_PCS_DW11_CH10x842c #define VLV_PCS_DW11(ch) _PORT(ch, _VLV_PCS_DW11_CH0, _VLV_PCS_DW11_CH1) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 4c54930..9cbd702 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -2346,7 +2346,13 @@ static uint32_t intel_chv_signal_levels(struct intel_dp *intel_dp) mutex_lock(dev_priv-dpio_lock); /* Clear calc init */ - vlv_dpio_write(dev_priv, pipe, CHV_PCS_DW10(ch), 0); + val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW10(ch)); + val = ~(DPIO_PCS_SWING_CALC_TX0_TX2 | DPIO_PCS_SWING_CALC_TX1_TX3); + vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW10(ch), val); + + val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW10(ch)); + val = ~(DPIO_PCS_SWING_CALC_TX0_TX2 | DPIO_PCS_SWING_CALC_TX1_TX3); + vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW10(ch), val); /* Program swing deemph */ for (i = 0; i 4; i++) { @@ -2397,8 +2403,13 @@ static uint32_t intel_chv_signal_levels(struct intel_dp *intel_dp) } /* Start swing calculation */ - vlv_dpio_write(dev_priv, pipe, CHV_PCS_DW10(ch), - (DPIO_PCS_SWING_CALC_TX0_TX2 | DPIO_PCS_SWING_CALC_TX1_TX3)); + val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW10(ch)); + val |= DPIO_PCS_SWING_CALC_TX0_TX2 | DPIO_PCS_SWING_CALC_TX1_TX3; + vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW10(ch), val); + + val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW10(ch)); + val |= DPIO_PCS_SWING_CALC_TX0_TX2 | DPIO_PCS_SWING_CALC_TX1_TX3; + vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW10(ch), val); /* LRC Bypass */ val = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW30); diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index e912554..d2b1186 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1283,7 +1283,13 @@ static void chv_hdmi_pre_enable(struct intel_encoder *encoder) /* FIXME: Fix up value only after power analysis */ /* Clear calc init */ - vlv_dpio_write(dev_priv, pipe, CHV_PCS_DW10(ch), 0); + val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW10(ch)); + val = ~(DPIO_PCS_SWING_CALC_TX0_TX2 | DPIO_PCS_SWING_CALC_TX1_TX3); + vlv_dpio_write(dev_priv, pipe, VLV_PCS01_DW10(ch), val); + + val = vlv_dpio_read(dev_priv, pipe, VLV_PCS23_DW10(ch)); + val = ~(DPIO_PCS_SWING_CALC_TX0_TX2 | DPIO_PCS_SWING_CALC_TX1_TX3); + vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW10(ch), val); /* FIXME: Program the support xxx V-dB */ /* Use 800mV-0dB */ @@ -1322,9 +1328,13 @@ static void chv_hdmi_pre_enable(struct intel_encoder *encoder) (0x9a DPIO_UNIQ_TRANS_SCALE_SHIFT)); #endif /* Start swing calculation */ - vlv_dpio_write(dev_priv, pipe, CHV_PCS_DW10(ch), - DPIO_PCS_SWING_CALC_TX0_TX2 | -
[Intel-gfx] [PATCH v6] drm/i915/vlv: WA for Turbo and RC6 to work together.
From: Deepak S deepa...@linux.intel.com With RC6 enabled, BYT has an HW issue in determining the right Gfx busyness. WA for Turbo + RC6: Use SW based Gfx busy-ness detection to decide on increasing/decreasing the freq. This logic will monitor C0 counters of render/media power-wells over EI period and takes necessary action based on these values v2: Refactor duplicate code. (Ville) v3: Reformat the comments. (Ville) v4: Enable required counters and remove unwanted code (Ville) v5: Added frequency change acceleration support and remove kernel-doc style comments. (Ville) v6: Updated comment section and Fix w/a comment. (Ville) Signed-off-by: Deepak S deepa...@linux.intel.com Reviewed-by: Ville Syrjälä ville.syrj...@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.h | 15 + drivers/gpu/drm/i915/i915_irq.c | 133 +++- drivers/gpu/drm/i915/i915_reg.h | 11 drivers/gpu/drm/i915/intel_pm.c | 12 +++- 4 files changed, 167 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6136aab..5251946 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -819,6 +819,12 @@ struct i915_suspend_saved_registers { u32 savePCH_PORT_HOTPLUG; }; +struct intel_rps_ei_calc { + u32 cz_ts_ei; + u32 render_ei_c0; + u32 media_ei_c0; +}; + struct intel_gen6_power_mgmt { /* work and pm_iir are protected by dev_priv-irq_lock */ struct work_struct work; @@ -843,6 +849,8 @@ struct intel_gen6_power_mgmt { u8 rp1_freq;/* less than RP0 power/freqency */ u8 rp0_freq;/* Non-overclocked max frequency. */ + u32 ei_interrupt_count; + int last_adj; enum { LOW_POWER, BETWEEN, HIGH_POWER } power; @@ -1414,6 +1422,13 @@ struct drm_i915_private { /* gen6+ rps state */ struct intel_gen6_power_mgmt rps; + /* rps wa up ei calculation */ + struct intel_rps_ei_calc rps_up_ei; + + /* rps wa down ei calculation */ + struct intel_rps_ei_calc rps_down_ei; + + /* ilk-only ips/rps state. Everything in here is protected by the global * mchdev_lock in intel_pm.c */ struct intel_ilk_power_mgmt ips; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 2446e61..7d2efc8 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1116,6 +1116,131 @@ static void notify_ring(struct drm_device *dev, i915_queue_hangcheck(dev); } +static u32 vlv_c0_residency(struct drm_i915_private *dev_priv, + struct intel_rps_ei_calc *rps_ei) +{ + u32 cz_ts, cz_freq_khz; + u32 render_count, media_count; + u32 elapsed_render, elapsed_media, elapsed_time; + u32 residency = 0; + + cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP); + cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv-mem_freq * 1000, 4); + + render_count = I915_READ(VLV_RENDER_C0_COUNT_REG); + media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG); + + if (rps_ei-cz_ts_ei == 0) { + rps_ei-cz_ts_ei = cz_ts; + rps_ei-render_ei_c0 = render_count; + rps_ei-media_ei_c0 = media_count; + + return dev_priv-rps.cur_freq; + } + + elapsed_time = cz_ts - rps_ei-cz_ts_ei; + rps_ei-cz_ts_ei = cz_ts; + + elapsed_render = render_count - rps_ei-render_ei_c0; + rps_ei-render_ei_c0 = render_count; + + elapsed_media = media_count - rps_ei-media_ei_c0; + rps_ei-media_ei_c0 = media_count; + + /* Convert all the counters into common unit of milli sec */ + elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC; + elapsed_render /= cz_freq_khz; + elapsed_media /= cz_freq_khz; + + /* +* Calculate overall C0 residency percentage +* only if elapsed time is non zero +*/ + if (elapsed_time) { + residency = + ((max(elapsed_render, elapsed_media) * 100) + / elapsed_time); + } + + return residency; +} + +/** + * vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU + * busy-ness calculated from C0 counters of render media power wells + * @dev_priv: DRM device private + * + */ +static u32 vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv) +{ + u32 residency_C0_up = 0, residency_C0_down = 0; + u8 new_delay, adj; + + dev_priv-rps.ei_interrupt_count++; + + WARN_ON(!mutex_is_locked(dev_priv-rps.hw_lock)); + + + if (dev_priv-rps_up_ei.cz_ts_ei == 0) { + vlv_c0_residency(dev_priv, dev_priv-rps_up_ei); + vlv_c0_residency(dev_priv, dev_priv-rps_down_ei); + return dev_priv-rps.cur_freq; + } + + + /* +* To down throttle, C0 residency should be less than down
[Intel-gfx] [PATCH] tests/gem_exec_params: One more invalid ring tests
With the vebox 2 patches the number of internal rings don't match the number of exposed rings. So add another subtest with an invalid ring which should be invalid both internally and externally. The bug this will catch is using the ring structure before validation, which the old invalide-ring wont be able to due to the internal vebox2 ring. Signed-off-by: Daniel Vetter daniel.vet...@ffwll.ch --- tests/gem_exec_params.c | 5 + 1 file changed, 5 insertions(+) diff --git a/tests/gem_exec_params.c b/tests/gem_exec_params.c index 306039c244e3..769969d3fe56 100644 --- a/tests/gem_exec_params.c +++ b/tests/gem_exec_params.c @@ -117,6 +117,11 @@ igt_main RUN_FAIL(EINVAL); } igt_subtest(invalid-ring) { + execbuf.flags = I915_EXEC_RING_MASK; + RUN_FAIL(EINVAL); + } + + igt_subtest(invalid-ring2) { execbuf.flags = LOCAL_I915_EXEC_VEBOX+1; RUN_FAIL(EINVAL); } -- 1.8.1.4 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 19/71] drm/i915/chv: Trigger phy common lane reset
On Wed, 2014-04-09 at 13:28 +0300, ville.syrj...@linux.intel.com wrote: From: Chon Ming Lee chon.ming@intel.com During cold boot, the display controller needs to deassert the common lane reset. Only do it once during intel_init_dpio for both PHYx2 and PHYx1. Besides, assert the common lane reset when disable pll. This still to be determined whether need to do it by driver. Signed-off-by: Chon Ming Lee chon.ming@intel.com [vsyrjala: Don't disable DPIO PLL when using DSI] [vsyrjala: Don't call vlv_disable_pll() by accident on CHV] Signed-off-by: Ville Syrjälä ville.syrj...@linux.intel.com --- drivers/gpu/drm/i915/i915_reg.h | 8 + drivers/gpu/drm/i915/intel_display.c | 66 2 files changed, 59 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 8aea092..8fcf4ea 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1391,6 +1391,14 @@ enum punit_power_well { /* Additional CHV pll/phy registers */ #define DPIO_PHY_STATUS (VLV_DISPLAY_BASE + 0x6240) #define DPLL_PORTD_READY_MASK (0xf) +#define DISPLAY_PHY_CONTROL (VLV_DISPLAY_BASE + 0x60100) +#define PHY_COM_LANE_RESET_DEASSERT(phy, val) \ + ((phy == DPIO_PHY0) ? (val | 1) : (val | 2)) +#define PHY_COM_LANE_RESET_ASSERT(phy, val) \ + ((phy == DPIO_PHY0) ? (val ~1) : (val ~2)) +#define DISPLAY_PHY_STATUS (VLV_DISPLAY_BASE + 0x60104) +#define PHY_POWERGOOD(phy) ((phy == DPIO_PHY0) ? (131) : (130)) + /* * The i830 generation, in LVDS mode, defines P1 as the bit number set within * this field (only one bit may be set). diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 153f244..e33667d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1395,17 +1395,36 @@ static void intel_reset_dpio(struct drm_device *dev) DPLL_REFA_CLK_ENABLE_VLV | DPLL_INTEGRATED_CRI_CLK_VLV); - /* - * From VLV2A0_DP_eDP_DPIO_driver_vbios_notes_10.docx - - * 6. De-assert cmn_reset/side_reset. Same as VLV X0. - * a. GUnit 0x2110 bit[0] set to 1 (def 0) - * b. The other bits such as sfr settings / modesel may all be set - * to 0. This is VLV specific, so ok to be moved, - * - * This should only be done on init and resume from S3 with both - * PLLs disabled, or we risk losing DPIO and PLL synchronization. - */ but this is also true for CHV, so should stay. - I915_WRITE(DPIO_CTL, I915_READ(DPIO_CTL) | DPIO_CMNRST); + if (IS_CHERRYVIEW(dev)) { + enum dpio_phy phy; + u32 val; + + for (phy = DPIO_PHY0; phy I915_NUM_PHYS_VLV; phy++) { + /* Poll for phypwrgood signal */ + if (wait_for(I915_READ(DISPLAY_PHY_STATUS) + PHY_POWERGOOD(phy), 1)) + DRM_ERROR(Display PHY %d is not power up\n, phy); + + /* Deassert common lane reset for PHY*/ + val = I915_READ(DISPLAY_PHY_CONTROL); + I915_WRITE(DISPLAY_PHY_CONTROL, + PHY_COM_LANE_RESET_DEASSERT(phy, val)); Would be clearer not to hide the 'or' in the macro and let PHY_COM_LANE_RESET_DEASSERT be just the flag itself and do here I915_WRITE(DISPLAY_PHY_CONTROL, val | PHY_COM_LANE_RESET_DEASSERT(phy)); The above issues are minor, so even without fixing them this patch is Reviewed-by: Imre Deak imre.d...@intel.com + } + + } else { + /* + * From VLV2A0_DP_eDP_DPIO_driver_vbios_notes_10.docx - + * 6. De-assert cmn_reset/side_reset. Same as VLV X0. + * a. GUnit 0x2110 bit[0] set to 1 (def 0) + * b. The other bits such as sfr settings / modesel may all + * be set to 0. + * + * This should only be done on init and resume from S3 with + * both PLLs disabled, or we risk losing DPIO and PLL + * synchronization. + */ + I915_WRITE(DPIO_CTL, I915_READ(DPIO_CTL) | DPIO_CMNRST); + } } static void vlv_enable_pll(struct intel_crtc *crtc) @@ -1529,6 +1548,19 @@ static void vlv_disable_pll(struct drm_i915_private *dev_priv, enum pipe pipe) val = DPLL_INTEGRATED_CRI_CLK_VLV | DPLL_REFA_CLK_ENABLE_VLV; I915_WRITE(DPLL(pipe), val); POSTING_READ(DPLL(pipe)); + +} + +static void chv_disable_pll(struct drm_i915_private *dev_priv, enum pipe pipe) +{ + int dpll = DPLL(pipe); + u32 val; + + /* Set PLL en = 0 */ + val = I915_READ(dpll); + val =
Re: [Intel-gfx] [PATCH v3] drm/i915: Add boot paramter to control rps boost at boot time.
On Mon, Apr 28, 2014 at 08:17:04PM +0530, deepa...@linux.intel.com wrote: From: Deepak S deepa...@linux.intel.com We are adding a module paramter to control rps boost. By default, we enable the boost for better performace. Based on the need (perf/power) we can either enable/disable. v2: Addressed rps default comment (Jani) v3: Use bool to represent the boot parameter (Ville). Signed-off-by: Deepak S deepa...@linux.intel.com Reviewed-by: Ville Syrjälä ville.syrj...@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.h| 1 + drivers/gpu/drm/i915/i915_gem.c| 2 +- drivers/gpu/drm/i915/i915_params.c | 5 + 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e81feab..6136aab 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1945,6 +1945,7 @@ struct i915_params { bool reset; bool disable_display; bool disable_vtd_wa; + bool enable_rps_boost; }; extern struct i915_params i915 __read_mostly; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b00a77e..f2b3262 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1049,7 +1049,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, timeout_expire = timeout ? jiffies + timespec_to_jiffies_timeout(timeout) : 0; - if (INTEL_INFO(dev)-gen = 6 can_wait_boost(file_priv)) { + if (INTEL_INFO(dev)-gen = 6 can_wait_boost(file_priv) i915.enable_rps_boost) { The separate INTEL_INFO was because this used to be a neat dev_priv-info.gen dereference (and dev used to not be derivable from file_priv, which itself may be NULL here), but please don't add another predicate that means can_wait_boost(). -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [RFC] drm/i915: Add variable gem object size support to i915
From: Siluvery, Arun arun.siluv...@intel.com This patch adds support to have gem objects of variable size. The size of the gem object obj-size is always constant and this fact is tightly coupled in the driver; this implementation allows to vary its effective size using an interface similar to fallocate(). A new ioctl() is introduced to mark a range as scratch/usable. Once marked as scratch, associated backing store is released and the region is filled with scratch pages. The region can also be unmarked at a later point in which case new backing pages are created. The range can be anywhere within the object space, it can have multiple ranges possibly overlapping forming a large contiguous range. There is only one single scratch page and Kernel allows to write to this page; userspace need to keep track of scratch page range otherwise any subsequent writes to these pages will overwrite previous content. This feature is useful where the exact size of the object is not clear at the time of its creation, in such case we usually create an object with more than the required size but end up using it partially. In devices where there are tight memory constraints it would be useful to release that additional space which is currently unused. Using this interface the region can be simply marked as scratch which releases its backing store thus reducing the memory pressure on the kernel. Many thanks to Daniel, ChrisW, Tvrtko, Bob for the idea and feedback on this implementation. v2: fix holes in error handling and use consistent data types (Tvrtko) - If page allocation fails simply return error; do not try to invoke shrinker to free backing store. - Release new pages created by us in case of error during page allocation or sg_table update. - Use 64-bit data types for start and length values to avoid truncation. Change-Id: Id3339be95dbb6b5c69c39d751986c40ec0ccdaf8 Signed-off-by: Siluvery, Arun arun.siluv...@intel.com --- Please let me know if I need to submit this as PATCH instead of RFC. Since this is RFC I have included all changes as a single patch. drivers/gpu/drm/i915/i915_dma.c | 1 + drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/i915_gem.c | 205 include/uapi/drm/i915_drm.h | 31 ++ 4 files changed, 239 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 31c499f..3dd4b1a 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -2000,6 +2000,7 @@ const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_GET_RESET_STATS, i915_get_reset_stats_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, \ DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_GEM_FALLOCATE, i915_gem_fallocate_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_SET_PLANE_180_ROTATION, \ i915_set_plane_180_rotation, DRM_AUTH | DRM_UNLOCKED), DRM_IOCTL_DEF_DRV(I915_ENABLE_PLANE_RESERVED_REG_BIT_2, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4069800..1f30fb6 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2210,6 +2210,8 @@ int i915_gem_get_tiling(struct drm_device *dev, void *data, int i915_gem_init_userptr(struct drm_device *dev); int i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +int i915_gem_fallocate_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_wait_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6153e01..a0188ee 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -317,6 +317,211 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data, args-size, args-handle); } +static int i915_gem_obj_fallocate(struct drm_i915_gem_object *obj, + bool mark_scratch, uint64_t start, + uint64_t length) +{ + int i, j; + int ret; + uint32_t start_page, end_page; + uint32_t page_count; + gfp_t gfp; + bool update_sg_table = false; + unsigned long scratch_pfn; + struct page *scratch; + struct page **pages; + struct sg_table *sg = NULL; + struct sg_page_iter sg_iter; + struct address_space *mapping; + struct drm_i915_private *dev_priv; + + dev_priv = obj-base.dev-dev_private; + start_page = start PAGE_SHIFT; + end_page = (start + length) PAGE_SHIFT; + page_count = obj-base.size
Re: [Intel-gfx] [PATCH 03/10] drm/i915/chv: Enable Render Standby (RC6) for Cheeryview
On Monday 28 April 2014 08:15 PM, Daniel Vetter wrote: On Mon, Apr 28, 2014 at 05:29:46PM +0300, Imre Deak wrote: +static void cherryview_setup_pctx(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev-dev_private; + unsigned long pctx_paddr; + struct i915_gtt *gtt = dev_priv-gtt; + u32 pcbr; + int pctx_size = 32*1024; + + pcbr = I915_READ(VLV_PCBR); + if ((pcbr VLV_PCBR_ADDR_SHIFT) == 0) { + /* +* From the Gunit register HAS: +* The Gfx driver is expected to program this register and ensure +* proper allocation within Gfx stolen memory. For example, this +* register should be programmed such than the PCBR range does not +* overlap with other relevant ranges. +*/ + pctx_paddr = (dev_priv-mm.stolen_base + gtt-stolen_size - pctx_size); This area should be reserved. We've had a really lengthy discussion internally about the bios-reserved chunk in stolen. It was stalled due to (imo unjustified) fear to leak information what the bios actually uses this for. If we need to reserve more of stolen than we currently do we need to pick up that approach again instead of adding more bandaids. -Daniel Agreed. Will change accordingly. ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 03/10] drm/i915/chv: Enable Render Standby (RC6) for Cheeryview
Thanks for the review. I will address the comments On Saturday 26 April 2014 03:12 AM, Ben Widawsky wrote: On Mon, Apr 21, 2014 at 01:34:07PM +0530, deepa...@linux.intel.com wrote: From: Deepak S deepa...@linux.intel.com v2: Configure PCBR if BIOS fails allocate pcbr (deepak) v3: Fix PCBR condition check during CHV RC6 Enable flag set Signed-off-by: Deepak S deepa...@linux.intel.com --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 100 +++- 2 files changed, 99 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index b951d61..7090b42 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -5134,6 +5134,7 @@ enum punit_power_well { #define GEN6_GT_GFX_RC6 0x138108 #define GEN6_GT_GFX_RC6p 0x13810C #define GEN6_GT_GFX_RC6pp 0x138110 +#define VLV_PCBR_ADDR_SHIFT12 #define GEN6_PCODE_MAILBOX 0x138124 #define GEN6_PCODE_READY(131) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index f3c5bce..421a4cc 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3264,6 +3264,18 @@ static void gen6_disable_rps(struct drm_device *dev) gen6_disable_rps_interrupts(dev); } +static void cherryview_disable_rps(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev-dev_private; + + I915_WRITE(GEN6_RC_CONTROL, 0); + + if (dev_priv-vlv_pctx) { + drm_gem_object_unreference(dev_priv-vlv_pctx-base); + dev_priv-vlv_pctx = NULL; + } +} + static void valleyview_disable_rps(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev-dev_private; @@ -3642,6 +3654,28 @@ static void valleyview_check_pctx(struct drm_i915_private *dev_priv) dev_priv-vlv_pctx-stolen-start); } +static void cherryview_setup_pctx(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev-dev_private; + unsigned long pctx_paddr; + struct i915_gtt *gtt = dev_priv-gtt; + u32 pcbr; + int pctx_size = 32*1024; + + pcbr = I915_READ(VLV_PCBR); + if ((pcbr VLV_PCBR_ADDR_SHIFT) == 0) { + /* +* From the Gunit register HAS: +* The Gfx driver is expected to program this register and ensure +* proper allocation within Gfx stolen memory. For example, this +* register should be programmed such than the PCBR range does not +* overlap with other relevant ranges. +*/ + pctx_paddr = (dev_priv-mm.stolen_base + gtt-stolen_size - pctx_size); + I915_WRITE(VLV_PCBR, pctx_paddr); + } +} + Is there a reason we did not follow the same idioms as Valleyview? Shouldn't we be building a stolen object like we do there, and then using that? Furthermore, we need to make sure we make the stolen allocator aware for the case where pcbr is not zero, like we do for valleyview. I think the best solution here is to try to combine the valleyview and cherryview logic for this function. Extract out size, and most of the rest looks pretty similar. For enabling, I am fine with it as is though provided it's hidden by preliminary flag. static void valleyview_setup_pctx(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev-dev_private; @@ -3697,6 +3731,61 @@ static void valleyview_cleanup_pctx(struct drm_device *dev) dev_priv-vlv_pctx = NULL; } +static void cherryview_enable_rps(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev-dev_private; + struct intel_ring_buffer *ring; + u32 gtfifodbg, rc6_mode = 0, pcbr; + int i; + + WARN_ON(!mutex_is_locked(dev_priv-rps.hw_lock)); + + if ((gtfifodbg = I915_READ(GTFIFODBG))) { + DRM_DEBUG_DRIVER(GT fifo had a previous error %x\n, +gtfifodbg); + I915_WRITE(GTFIFODBG, gtfifodbg); + } + + cherryview_setup_pctx(dev); + + /* 1a 1b: Get forcewake during program sequence. Although the driver +* hasn't enabled a state yet where we need forcewake, BIOS may have.*/ + gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); + + /* 2a: Program RC6 thresholds.*/ + I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 16); + I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ + I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ + + for_each_ring(ring, dev_priv, i) + I915_WRITE(RING_MAX_IDLE(ring-mmio_base), 10); + + I915_WRITE(GEN6_RC6_THRESHOLD, 5); /* 50/125ms per EI */ + + /* allows RC6 residency counter to work */ +
Re: [Intel-gfx] [PATCH 03/10] drm/i915/chv: Enable Render Standby (RC6) for Cheeryview
Thanks for the review. I will address the comments On Monday 28 April 2014 07:59 PM, Imre Deak wrote: On Mon, 2014-04-21 at 13:34 +0530, deepa...@linux.intel.com wrote: From: Deepak S deepa...@linux.intel.com v2: Configure PCBR if BIOS fails allocate pcbr (deepak) v3: Fix PCBR condition check during CHV RC6 Enable flag set Signed-off-by: Deepak S deepa...@linux.intel.com --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 100 +++- 2 files changed, 99 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index b951d61..7090b42 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -5134,6 +5134,7 @@ enum punit_power_well { #define GEN6_GT_GFX_RC6 0x138108 #define GEN6_GT_GFX_RC6p 0x13810C #define GEN6_GT_GFX_RC6pp 0x138110 +#define VLV_PCBR_ADDR_SHIFT12 #define GEN6_PCODE_MAILBOX 0x138124 #define GEN6_PCODE_READY(131) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index f3c5bce..421a4cc 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3264,6 +3264,18 @@ static void gen6_disable_rps(struct drm_device *dev) gen6_disable_rps_interrupts(dev); } +static void cherryview_disable_rps(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev-dev_private; + + I915_WRITE(GEN6_RC_CONTROL, 0); + + if (dev_priv-vlv_pctx) { + drm_gem_object_unreference(dev_priv-vlv_pctx-base); + dev_priv-vlv_pctx = NULL; + } +} + static void valleyview_disable_rps(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev-dev_private; @@ -3642,6 +3654,28 @@ static void valleyview_check_pctx(struct drm_i915_private *dev_priv) dev_priv-vlv_pctx-stolen-start); } +static void cherryview_setup_pctx(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev-dev_private; + unsigned long pctx_paddr; + struct i915_gtt *gtt = dev_priv-gtt; + u32 pcbr; + int pctx_size = 32*1024; + + pcbr = I915_READ(VLV_PCBR); + if ((pcbr VLV_PCBR_ADDR_SHIFT) == 0) { + /* +* From the Gunit register HAS: +* The Gfx driver is expected to program this register and ensure +* proper allocation within Gfx stolen memory. For example, this +* register should be programmed such than the PCBR range does not +* overlap with other relevant ranges. +*/ + pctx_paddr = (dev_priv-mm.stolen_base + gtt-stolen_size - pctx_size); This area should be reserved. + I915_WRITE(VLV_PCBR, pctx_paddr); + } +} + static void valleyview_setup_pctx(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev-dev_private; @@ -3697,6 +3731,61 @@ static void valleyview_cleanup_pctx(struct drm_device *dev) dev_priv-vlv_pctx = NULL; } +static void cherryview_enable_rps(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev-dev_private; + struct intel_ring_buffer *ring; + u32 gtfifodbg, rc6_mode = 0, pcbr; + int i; + + WARN_ON(!mutex_is_locked(dev_priv-rps.hw_lock)); + + if ((gtfifodbg = I915_READ(GTFIFODBG))) { + DRM_DEBUG_DRIVER(GT fifo had a previous error %x\n, +gtfifodbg); + I915_WRITE(GTFIFODBG, gtfifodbg); + } + + cherryview_setup_pctx(dev); This should be called from intel_init_gt_powersave(). + + /* 1a 1b: Get forcewake during program sequence. Although the driver +* hasn't enabled a state yet where we need forcewake, BIOS may have.*/ + gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); + + /* 2a: Program RC6 thresholds.*/ + I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 16); + I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ + I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ + + for_each_ring(ring, dev_priv, i) + I915_WRITE(RING_MAX_IDLE(ring-mmio_base), 10); + + I915_WRITE(GEN6_RC6_THRESHOLD, 5); /* 50/125ms per EI */ + + /* allows RC6 residency counter to work */ + I915_WRITE(VLV_COUNTER_CONTROL, + _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | + VLV_MEDIA_RC6_COUNT_EN | + VLV_RENDER_RC6_COUNT_EN)); + + /* Todo: If BIOS has not configured PCBR +* then allocate in BIOS Reserved */ + + /* For now we assume BIOS is allocating and populating the PCBR */ + pcbr = I915_READ(VLV_PCBR); + + DRM_DEBUG_DRIVER(PCBR offset :
Re: [Intel-gfx] [PATCH v3] drm/i915: Add boot paramter to control rps boost at boot time.
On Mon, Apr 28, 2014 at 4:47 PM, deepa...@linux.intel.com wrote: From: Deepak S deepa...@linux.intel.com We are adding a module paramter to control rps boost. By default, we enable the boost for better performace. Based on the need (perf/power) we can either enable/disable. v2: Addressed rps default comment (Jani) v3: Use bool to represent the boot parameter (Ville). Signed-off-by: Deepak S deepa...@linux.intel.com Reviewed-by: Ville Syrjälä ville.syrj...@linux.intel.com I'm still unhappy about this since it feels like cheating in benchmarks and it gives me the impression that you guys frob this at runtime on Android ;-) A few more ideas: 1. light-boost: We add some hysteris (either time or whether we're still above rpe or something like that) and don't boost if this is the case. I expect that we won't be able to have the full boost benefits without the downside. 2. eco-boost. We try to boost just enough to not miss the next frame. For that the app needs to tell us (with two new execbuf flag) whether it hit or missed the last deadline. Once an app used those flags for the first time we decrease the boost target freq once per HIT_DEADLINE and until we get the first MISS_DEADLINE. The we only try to sporadically test the limit again. TCP flow control theory might be interesting for copying ideas. 3. runtime-boost-control. The workloads with very predictable regular loads seem to be known. We can just add a new execbuf NO_BOOST flag which libva uses on all execbufs but the first one (since we don't want to drop the first frame really). Approach 3 should be the simplest to implement and also the simplest to demonstrate in the open-source libva (since that's always a merge criteria). Aside: If you really use this at runtime then you essentially create a new ABI with this patch. Which means we need open-source userspace for it anyway. -Daniel -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915: Use hash tables for the command parser
From: Brad Volkin bradley.d.vol...@intel.com For clients that submit large batch buffers the command parser has a substantial impact on performance. On my HSW ULT system performance drops as much as ~20% on some tests. Most of the time is spent in the command lookup code. Converting that from the current naive search to a hash table lookup reduces the performance impact by as much as ~10%. The choice of value for I915_CMD_HASH_ORDER allows all commands currently used in the parser tables to hash to their own bucket (except for one collision on the render ring). The tradeoff is that it wastes memory. Because the opcodes for the commands in the tables are not particularly well distributed, reducing the order still leaves many buckets empty. The increased collisions don't seem to have a huge impact on the performance gain, but for now anyhow, the parser trades memory for performance. Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 136 drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 2 + drivers/gpu/drm/i915/intel_ringbuffer.h | 11 ++- 4 files changed, 116 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 9bac097..9dca899 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -498,16 +498,18 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header) return 0; } -static bool validate_cmds_sorted(struct intel_ring_buffer *ring) +static bool validate_cmds_sorted(struct intel_ring_buffer *ring, +const struct drm_i915_cmd_table *cmd_tables, +int cmd_table_count) { int i; bool ret = true; - if (!ring-cmd_tables || ring-cmd_table_count == 0) + if (!cmd_tables || cmd_table_count == 0) return true; - for (i = 0; i ring-cmd_table_count; i++) { - const struct drm_i915_cmd_table *table = ring-cmd_tables[i]; + for (i = 0; i cmd_table_count; i++) { + const struct drm_i915_cmd_table *table = cmd_tables[i]; u32 previous = 0; int j; @@ -557,6 +559,60 @@ static bool validate_regs_sorted(struct intel_ring_buffer *ring) ring-master_reg_count); } +struct cmd_node { + const struct drm_i915_cmd_descriptor *desc; + struct hlist_node node; +}; + +/* + * Different command ranges have different numbers of bits for the opcode. + * In order to use the opcode bits, and only the opcode bits, for the hash key + * we should use the MI_* command opcode mask (since those commands use the + * fewest bits for the opcode.) + */ +#define CMD_HASH_MASK STD_MI_OPCODE_MASK + +static int init_hash_table(struct intel_ring_buffer *ring, + const struct drm_i915_cmd_table *cmd_tables, + int cmd_table_count) +{ + int i, j; + + hash_init(ring-cmd_hash); + + for (i = 0; i cmd_table_count; i++) { + const struct drm_i915_cmd_table *table = cmd_tables[i]; + + for (j = 0; j table-count; j++) { + const struct drm_i915_cmd_descriptor *desc = + table-table[j]; + struct cmd_node *desc_node = + kmalloc(sizeof(*desc_node), GFP_KERNEL); + + if (!desc_node) + return -ENOMEM; + + desc_node-desc = desc; + hash_add(ring-cmd_hash, desc_node-node, +desc-cmd.value CMD_HASH_MASK); + } + } + + return 0; +} + +static void fini_hash_table(struct intel_ring_buffer *ring) +{ + struct hlist_node *tmp; + struct cmd_node *desc_node; + int i; + + hash_for_each_safe(ring-cmd_hash, i, tmp, desc_node, node) { + hash_del(desc_node-node); + kfree(desc_node); + } +} + /** * i915_cmd_parser_init_ring() - set cmd parser related fields for a ringbuffer * @ring: the ringbuffer to initialize @@ -567,18 +623,21 @@ static bool validate_regs_sorted(struct intel_ring_buffer *ring) */ void i915_cmd_parser_init_ring(struct intel_ring_buffer *ring) { + const struct drm_i915_cmd_table *cmd_tables; + int cmd_table_count; + if (!IS_GEN7(ring-dev)) return; switch (ring-id) { case RCS: if (IS_HASWELL(ring-dev)) { - ring-cmd_tables = hsw_render_ring_cmds; - ring-cmd_table_count = + cmd_tables = hsw_render_ring_cmds; + cmd_table_count = ARRAY_SIZE(hsw_render_ring_cmds); } else { -
Re: [Intel-gfx] [PATCH] drm/i915: Use hash tables for the command parser
On Mon, Apr 28, 2014 at 08:22:08AM -0700, bradley.d.vol...@intel.com wrote: From: Brad Volkin bradley.d.vol...@intel.com For clients that submit large batch buffers the command parser has a substantial impact on performance. On my HSW ULT system performance drops as much as ~20% on some tests. Most of the time is spent in the command lookup code. Converting that from the current naive search to a hash table lookup reduces the performance impact by as much as ~10%. The choice of value for I915_CMD_HASH_ORDER allows all commands currently used in the parser tables to hash to their own bucket (except for one collision on the render ring). The tradeoff is that it wastes memory. Because the opcodes for the commands in the tables are not particularly well distributed, reducing the order still leaves many buckets empty. The increased collisions don't seem to have a huge impact on the performance gain, but for now anyhow, the parser trades memory for performance. Signed-off-by: Brad Volkin bradley.d.vol...@intel.com Nice. One idea on top which could be worth a shot is a bloomfilter to handle all the non-special cases without a (likely) cache miss in the hashtable. The per-ring bloomfilter would be only loaded once (and if we place it nearby other stuff the cmdparser needs anyway even that is amortized). Also Chris mentioned that blitter loads under X are about the worst case wrt impact of the cmdparser. Benchmarking x11perf might be a useful extreme testcase for optimizing this. I guess Chris will jump in with more ideas? Thanks, Daniel --- drivers/gpu/drm/i915/i915_cmd_parser.c | 136 drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 2 + drivers/gpu/drm/i915/intel_ringbuffer.h | 11 ++- 4 files changed, 116 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 9bac097..9dca899 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -498,16 +498,18 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header) return 0; } -static bool validate_cmds_sorted(struct intel_ring_buffer *ring) +static bool validate_cmds_sorted(struct intel_ring_buffer *ring, + const struct drm_i915_cmd_table *cmd_tables, + int cmd_table_count) { int i; bool ret = true; - if (!ring-cmd_tables || ring-cmd_table_count == 0) + if (!cmd_tables || cmd_table_count == 0) return true; - for (i = 0; i ring-cmd_table_count; i++) { - const struct drm_i915_cmd_table *table = ring-cmd_tables[i]; + for (i = 0; i cmd_table_count; i++) { + const struct drm_i915_cmd_table *table = cmd_tables[i]; u32 previous = 0; int j; @@ -557,6 +559,60 @@ static bool validate_regs_sorted(struct intel_ring_buffer *ring) ring-master_reg_count); } +struct cmd_node { + const struct drm_i915_cmd_descriptor *desc; + struct hlist_node node; +}; + +/* + * Different command ranges have different numbers of bits for the opcode. + * In order to use the opcode bits, and only the opcode bits, for the hash key + * we should use the MI_* command opcode mask (since those commands use the + * fewest bits for the opcode.) + */ +#define CMD_HASH_MASK STD_MI_OPCODE_MASK + +static int init_hash_table(struct intel_ring_buffer *ring, +const struct drm_i915_cmd_table *cmd_tables, +int cmd_table_count) +{ + int i, j; + + hash_init(ring-cmd_hash); + + for (i = 0; i cmd_table_count; i++) { + const struct drm_i915_cmd_table *table = cmd_tables[i]; + + for (j = 0; j table-count; j++) { + const struct drm_i915_cmd_descriptor *desc = + table-table[j]; + struct cmd_node *desc_node = + kmalloc(sizeof(*desc_node), GFP_KERNEL); + + if (!desc_node) + return -ENOMEM; + + desc_node-desc = desc; + hash_add(ring-cmd_hash, desc_node-node, + desc-cmd.value CMD_HASH_MASK); + } + } + + return 0; +} + +static void fini_hash_table(struct intel_ring_buffer *ring) +{ + struct hlist_node *tmp; + struct cmd_node *desc_node; + int i; + + hash_for_each_safe(ring-cmd_hash, i, tmp, desc_node, node) { + hash_del(desc_node-node); + kfree(desc_node); + } +} + /** * i915_cmd_parser_init_ring() - set cmd parser related fields for a ringbuffer * @ring: the ringbuffer to initialize @@ -567,18 +623,21 @@ static bool
Re: [Intel-gfx] [PATCH] drm/i915: Use hash tables for the command parser
On Mon, Apr 28, 2014 at 08:22:08AM -0700, Volkin, Bradley D wrote: From: Brad Volkin bradley.d.vol...@intel.com For clients that submit large batch buffers the command parser has a substantial impact on performance. On my HSW ULT system performance drops as much as ~20% on some tests. Most of the time is spent in the command lookup code. Converting that from the current naive search to a hash table lookup reduces the performance impact by as much as ~10%. Tvrtko pointed out that what I wrote here is a bit ambiguous. To clarify: Without the patch, perf drops 20% With the patch, perf drops 10% Brad The choice of value for I915_CMD_HASH_ORDER allows all commands currently used in the parser tables to hash to their own bucket (except for one collision on the render ring). The tradeoff is that it wastes memory. Because the opcodes for the commands in the tables are not particularly well distributed, reducing the order still leaves many buckets empty. The increased collisions don't seem to have a huge impact on the performance gain, but for now anyhow, the parser trades memory for performance. Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 136 drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 2 + drivers/gpu/drm/i915/intel_ringbuffer.h | 11 ++- 4 files changed, 116 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 9bac097..9dca899 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -498,16 +498,18 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header) return 0; } -static bool validate_cmds_sorted(struct intel_ring_buffer *ring) +static bool validate_cmds_sorted(struct intel_ring_buffer *ring, + const struct drm_i915_cmd_table *cmd_tables, + int cmd_table_count) { int i; bool ret = true; - if (!ring-cmd_tables || ring-cmd_table_count == 0) + if (!cmd_tables || cmd_table_count == 0) return true; - for (i = 0; i ring-cmd_table_count; i++) { - const struct drm_i915_cmd_table *table = ring-cmd_tables[i]; + for (i = 0; i cmd_table_count; i++) { + const struct drm_i915_cmd_table *table = cmd_tables[i]; u32 previous = 0; int j; @@ -557,6 +559,60 @@ static bool validate_regs_sorted(struct intel_ring_buffer *ring) ring-master_reg_count); } +struct cmd_node { + const struct drm_i915_cmd_descriptor *desc; + struct hlist_node node; +}; + +/* + * Different command ranges have different numbers of bits for the opcode. + * In order to use the opcode bits, and only the opcode bits, for the hash key + * we should use the MI_* command opcode mask (since those commands use the + * fewest bits for the opcode.) + */ +#define CMD_HASH_MASK STD_MI_OPCODE_MASK + +static int init_hash_table(struct intel_ring_buffer *ring, +const struct drm_i915_cmd_table *cmd_tables, +int cmd_table_count) +{ + int i, j; + + hash_init(ring-cmd_hash); + + for (i = 0; i cmd_table_count; i++) { + const struct drm_i915_cmd_table *table = cmd_tables[i]; + + for (j = 0; j table-count; j++) { + const struct drm_i915_cmd_descriptor *desc = + table-table[j]; + struct cmd_node *desc_node = + kmalloc(sizeof(*desc_node), GFP_KERNEL); + + if (!desc_node) + return -ENOMEM; + + desc_node-desc = desc; + hash_add(ring-cmd_hash, desc_node-node, + desc-cmd.value CMD_HASH_MASK); + } + } + + return 0; +} + +static void fini_hash_table(struct intel_ring_buffer *ring) +{ + struct hlist_node *tmp; + struct cmd_node *desc_node; + int i; + + hash_for_each_safe(ring-cmd_hash, i, tmp, desc_node, node) { + hash_del(desc_node-node); + kfree(desc_node); + } +} + /** * i915_cmd_parser_init_ring() - set cmd parser related fields for a ringbuffer * @ring: the ringbuffer to initialize @@ -567,18 +623,21 @@ static bool validate_regs_sorted(struct intel_ring_buffer *ring) */ void i915_cmd_parser_init_ring(struct intel_ring_buffer *ring) { + const struct drm_i915_cmd_table *cmd_tables; + int cmd_table_count; + if (!IS_GEN7(ring-dev)) return; switch (ring-id) { case RCS: if (IS_HASWELL(ring-dev)) { - ring-cmd_tables = hsw_render_ring_cmds; -
Re: [Intel-gfx] [PATCH] drm/i915: Use hash tables for the command parser
On Mon, Apr 28, 2014 at 08:22:08AM -0700, bradley.d.vol...@intel.com wrote: From: Brad Volkin bradley.d.vol...@intel.com For clients that submit large batch buffers the command parser has a substantial impact on performance. On my HSW ULT system performance drops as much as ~20% on some tests. Most of the time is spent in the command lookup code. Converting that from the current naive search to a hash table lookup reduces the performance impact by as much as ~10%. The choice of value for I915_CMD_HASH_ORDER allows all commands currently used in the parser tables to hash to their own bucket (except for one collision on the render ring). The tradeoff is that it wastes memory. Because the opcodes for the commands in the tables are not particularly well distributed, reducing the order still leaves many buckets empty. The increased collisions don't seem to have a huge impact on the performance gain, but for now anyhow, the parser trades memory for performance. For the collisions have you looked into pre-munging the key a bit so that we use more bits? A few shifts and xors shouldn't affect perf much really. Also since the tables are mostly empty we could just overflow to the next hashtable entry, but unfortunately that would require a bit of custom insert and lookup code. Finally if we manage to get 0 collisions a WARN_ON would be good for that, to make sure we don't accidentally regress. Anyway just a few more ideas. -Daniel Finally if we manage to get 0 collisions a WARN_ON would be good for that, to make sure we don't accidentally regress. Anyway just a few more ideas. -Daniel Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 136 drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 2 + drivers/gpu/drm/i915/intel_ringbuffer.h | 11 ++- 4 files changed, 116 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 9bac097..9dca899 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -498,16 +498,18 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header) return 0; } -static bool validate_cmds_sorted(struct intel_ring_buffer *ring) +static bool validate_cmds_sorted(struct intel_ring_buffer *ring, + const struct drm_i915_cmd_table *cmd_tables, + int cmd_table_count) { int i; bool ret = true; - if (!ring-cmd_tables || ring-cmd_table_count == 0) + if (!cmd_tables || cmd_table_count == 0) return true; - for (i = 0; i ring-cmd_table_count; i++) { - const struct drm_i915_cmd_table *table = ring-cmd_tables[i]; + for (i = 0; i cmd_table_count; i++) { + const struct drm_i915_cmd_table *table = cmd_tables[i]; u32 previous = 0; int j; @@ -557,6 +559,60 @@ static bool validate_regs_sorted(struct intel_ring_buffer *ring) ring-master_reg_count); } +struct cmd_node { + const struct drm_i915_cmd_descriptor *desc; + struct hlist_node node; +}; + +/* + * Different command ranges have different numbers of bits for the opcode. + * In order to use the opcode bits, and only the opcode bits, for the hash key + * we should use the MI_* command opcode mask (since those commands use the + * fewest bits for the opcode.) + */ +#define CMD_HASH_MASK STD_MI_OPCODE_MASK + +static int init_hash_table(struct intel_ring_buffer *ring, +const struct drm_i915_cmd_table *cmd_tables, +int cmd_table_count) +{ + int i, j; + + hash_init(ring-cmd_hash); + + for (i = 0; i cmd_table_count; i++) { + const struct drm_i915_cmd_table *table = cmd_tables[i]; + + for (j = 0; j table-count; j++) { + const struct drm_i915_cmd_descriptor *desc = + table-table[j]; + struct cmd_node *desc_node = + kmalloc(sizeof(*desc_node), GFP_KERNEL); + + if (!desc_node) + return -ENOMEM; + + desc_node-desc = desc; + hash_add(ring-cmd_hash, desc_node-node, + desc-cmd.value CMD_HASH_MASK); + } + } + + return 0; +} + +static void fini_hash_table(struct intel_ring_buffer *ring) +{ + struct hlist_node *tmp; + struct cmd_node *desc_node; + int i; + + hash_for_each_safe(ring-cmd_hash, i, tmp, desc_node, node) { + hash_del(desc_node-node); + kfree(desc_node); + } +} + /** * i915_cmd_parser_init_ring() - set cmd parser related fields for a ringbuffer
Re: [Intel-gfx] [PATCH] drm/i915: Use hash tables for the command parser
On Mon, Apr 28, 2014 at 08:42:56AM -0700, Daniel Vetter wrote: On Mon, Apr 28, 2014 at 08:22:08AM -0700, bradley.d.vol...@intel.com wrote: From: Brad Volkin bradley.d.vol...@intel.com For clients that submit large batch buffers the command parser has a substantial impact on performance. On my HSW ULT system performance drops as much as ~20% on some tests. Most of the time is spent in the command lookup code. Converting that from the current naive search to a hash table lookup reduces the performance impact by as much as ~10%. The choice of value for I915_CMD_HASH_ORDER allows all commands currently used in the parser tables to hash to their own bucket (except for one collision on the render ring). The tradeoff is that it wastes memory. Because the opcodes for the commands in the tables are not particularly well distributed, reducing the order still leaves many buckets empty. The increased collisions don't seem to have a huge impact on the performance gain, but for now anyhow, the parser trades memory for performance. Signed-off-by: Brad Volkin bradley.d.vol...@intel.com Nice. One idea on top which could be worth a shot is a bloomfilter to handle all the non-special cases without a (likely) cache miss in the hashtable. The per-ring bloomfilter would be only loaded once (and if we place it nearby other stuff the cmdparser needs anyway even that is amortized). Good suggestion. Noted. Also Chris mentioned that blitter loads under X are about the worst case wrt impact of the cmdparser. Benchmarking x11perf might be a useful extreme testcase for optimizing this. I guess Chris will jump in with more ideas? Ok, I'll see how x11perf looks with and without this patch as a starting point. Brad Thanks, Daniel --- drivers/gpu/drm/i915/i915_cmd_parser.c | 136 drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 2 + drivers/gpu/drm/i915/intel_ringbuffer.h | 11 ++- 4 files changed, 116 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 9bac097..9dca899 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -498,16 +498,18 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header) return 0; } -static bool validate_cmds_sorted(struct intel_ring_buffer *ring) +static bool validate_cmds_sorted(struct intel_ring_buffer *ring, +const struct drm_i915_cmd_table *cmd_tables, +int cmd_table_count) { int i; bool ret = true; - if (!ring-cmd_tables || ring-cmd_table_count == 0) + if (!cmd_tables || cmd_table_count == 0) return true; - for (i = 0; i ring-cmd_table_count; i++) { - const struct drm_i915_cmd_table *table = ring-cmd_tables[i]; + for (i = 0; i cmd_table_count; i++) { + const struct drm_i915_cmd_table *table = cmd_tables[i]; u32 previous = 0; int j; @@ -557,6 +559,60 @@ static bool validate_regs_sorted(struct intel_ring_buffer *ring) ring-master_reg_count); } +struct cmd_node { + const struct drm_i915_cmd_descriptor *desc; + struct hlist_node node; +}; + +/* + * Different command ranges have different numbers of bits for the opcode. + * In order to use the opcode bits, and only the opcode bits, for the hash key + * we should use the MI_* command opcode mask (since those commands use the + * fewest bits for the opcode.) + */ +#define CMD_HASH_MASK STD_MI_OPCODE_MASK + +static int init_hash_table(struct intel_ring_buffer *ring, + const struct drm_i915_cmd_table *cmd_tables, + int cmd_table_count) +{ + int i, j; + + hash_init(ring-cmd_hash); + + for (i = 0; i cmd_table_count; i++) { + const struct drm_i915_cmd_table *table = cmd_tables[i]; + + for (j = 0; j table-count; j++) { + const struct drm_i915_cmd_descriptor *desc = + table-table[j]; + struct cmd_node *desc_node = + kmalloc(sizeof(*desc_node), GFP_KERNEL); + + if (!desc_node) + return -ENOMEM; + + desc_node-desc = desc; + hash_add(ring-cmd_hash, desc_node-node, +desc-cmd.value CMD_HASH_MASK); + } + } + + return 0; +} + +static void fini_hash_table(struct intel_ring_buffer *ring) +{ + struct hlist_node *tmp; + struct cmd_node *desc_node; + int i; + + hash_for_each_safe(ring-cmd_hash, i, tmp, desc_node, node) { + hash_del(desc_node-node); +
Re: [Intel-gfx] [PATCH] drm/i915: Use hash tables for the command parser
On Mon, Apr 28, 2014 at 08:53:30AM -0700, Daniel Vetter wrote: On Mon, Apr 28, 2014 at 08:22:08AM -0700, bradley.d.vol...@intel.com wrote: From: Brad Volkin bradley.d.vol...@intel.com For clients that submit large batch buffers the command parser has a substantial impact on performance. On my HSW ULT system performance drops as much as ~20% on some tests. Most of the time is spent in the command lookup code. Converting that from the current naive search to a hash table lookup reduces the performance impact by as much as ~10%. The choice of value for I915_CMD_HASH_ORDER allows all commands currently used in the parser tables to hash to their own bucket (except for one collision on the render ring). The tradeoff is that it wastes memory. Because the opcodes for the commands in the tables are not particularly well distributed, reducing the order still leaves many buckets empty. The increased collisions don't seem to have a huge impact on the performance gain, but for now anyhow, the parser trades memory for performance. For the collisions have you looked into pre-munging the key a bit so that we use more bits? A few shifts and xors shouldn't affect perf much really. I looked at this briefly but didn't find a substantial improvement. The basic patch improved things enough that I wanted to just get it out. I can look into this more, but I'd like to think about implementing the batch buffer copy portion next. I don't want to optimize this, make people happy, and then introduce another perf drop from the copy. Better to just take the full hit now and then continue the improvements. Sound reasonable? Brad Also since the tables are mostly empty we could just overflow to the next hashtable entry, but unfortunately that would require a bit of custom insert and lookup code. Finally if we manage to get 0 collisions a WARN_ON would be good for that, to make sure we don't accidentally regress. Anyway just a few more ideas. -Daniel Finally if we manage to get 0 collisions a WARN_ON would be good for that, to make sure we don't accidentally regress. Anyway just a few more ideas. -Daniel Signed-off-by: Brad Volkin bradley.d.vol...@intel.com --- drivers/gpu/drm/i915/i915_cmd_parser.c | 136 drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 2 + drivers/gpu/drm/i915/intel_ringbuffer.h | 11 ++- 4 files changed, 116 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 9bac097..9dca899 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -498,16 +498,18 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header) return 0; } -static bool validate_cmds_sorted(struct intel_ring_buffer *ring) +static bool validate_cmds_sorted(struct intel_ring_buffer *ring, +const struct drm_i915_cmd_table *cmd_tables, +int cmd_table_count) { int i; bool ret = true; - if (!ring-cmd_tables || ring-cmd_table_count == 0) + if (!cmd_tables || cmd_table_count == 0) return true; - for (i = 0; i ring-cmd_table_count; i++) { - const struct drm_i915_cmd_table *table = ring-cmd_tables[i]; + for (i = 0; i cmd_table_count; i++) { + const struct drm_i915_cmd_table *table = cmd_tables[i]; u32 previous = 0; int j; @@ -557,6 +559,60 @@ static bool validate_regs_sorted(struct intel_ring_buffer *ring) ring-master_reg_count); } +struct cmd_node { + const struct drm_i915_cmd_descriptor *desc; + struct hlist_node node; +}; + +/* + * Different command ranges have different numbers of bits for the opcode. + * In order to use the opcode bits, and only the opcode bits, for the hash key + * we should use the MI_* command opcode mask (since those commands use the + * fewest bits for the opcode.) + */ +#define CMD_HASH_MASK STD_MI_OPCODE_MASK + +static int init_hash_table(struct intel_ring_buffer *ring, + const struct drm_i915_cmd_table *cmd_tables, + int cmd_table_count) +{ + int i, j; + + hash_init(ring-cmd_hash); + + for (i = 0; i cmd_table_count; i++) { + const struct drm_i915_cmd_table *table = cmd_tables[i]; + + for (j = 0; j table-count; j++) { + const struct drm_i915_cmd_descriptor *desc = + table-table[j]; + struct cmd_node *desc_node = + kmalloc(sizeof(*desc_node), GFP_KERNEL); + + if (!desc_node) + return -ENOMEM; + + desc_node-desc = desc; +
Re: [Intel-gfx] [PATCH] drm/i915: Use hash tables for the command parser
On Mon, Apr 28, 2014 at 6:07 PM, Volkin, Bradley D bradley.d.vol...@intel.com wrote: On Mon, Apr 28, 2014 at 08:53:30AM -0700, Daniel Vetter wrote: On Mon, Apr 28, 2014 at 08:22:08AM -0700, bradley.d.vol...@intel.com wrote: From: Brad Volkin bradley.d.vol...@intel.com For clients that submit large batch buffers the command parser has a substantial impact on performance. On my HSW ULT system performance drops as much as ~20% on some tests. Most of the time is spent in the command lookup code. Converting that from the current naive search to a hash table lookup reduces the performance impact by as much as ~10%. The choice of value for I915_CMD_HASH_ORDER allows all commands currently used in the parser tables to hash to their own bucket (except for one collision on the render ring). The tradeoff is that it wastes memory. Because the opcodes for the commands in the tables are not particularly well distributed, reducing the order still leaves many buckets empty. The increased collisions don't seem to have a huge impact on the performance gain, but for now anyhow, the parser trades memory for performance. For the collisions have you looked into pre-munging the key a bit so that we use more bits? A few shifts and xors shouldn't affect perf much really. I looked at this briefly but didn't find a substantial improvement. The basic patch improved things enough that I wanted to just get it out. I can look into this more, but I'd like to think about implementing the batch buffer copy portion next. I don't want to optimize this, make people happy, and then introduce another perf drop from the copy. Better to just take the full hit now and then continue the improvements. Sound reasonable? Yeah, makes sense. Like I've said just throwing around ideas. -Daniel -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 11/14] drm/i915/bdw: Add WT caching ability
On Fri, Apr 18, 2014 at 02:04:27PM -0700, Rodrigo Vivi wrote: From: Ben Widawsky benjamin.widaw...@intel.com I don't have any insight on what parts can do what. The docs do seem to suggest WT caching works in at least the same manner as it doesn't on Haswell. As Ben previously mentioned, s/doesn't/does. Other than that, looks good Reviewed-by: Brad Volkin bradley.d.vol...@intel.com The addr = 0 is to shut up GCC: drivers/gpu/drm/i915/i915_gem_gtt.c:80:7: warning: 'addr' may be used uninitialized in this function [-Wmaybe-uninitialized] Signed-off-by: Ben Widawsky b...@bwidawsk.net Signed-off-by: Rodrigo Vivi rodrigo.v...@gmail.com --- drivers/gpu/drm/i915/i915_drv.h | 11 ++- drivers/gpu/drm/i915/i915_gem_gtt.c | 17 + 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4e81ce1..2bc6745 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1835,11 +1835,12 @@ struct drm_i915_cmd_table { #define BSD_RING (1VCS) #define BLT_RING (1BCS) #define VEBOX_RING (1VECS) -#define HAS_BSD(dev)(INTEL_INFO(dev)-ring_mask BSD_RING) -#define HAS_BLT(dev)(INTEL_INFO(dev)-ring_mask BLT_RING) -#define HAS_VEBOX(dev)(INTEL_INFO(dev)-ring_mask VEBOX_RING) -#define HAS_LLC(dev)(INTEL_INFO(dev)-has_llc) -#define HAS_WT(dev)(IS_HASWELL(dev) to_i915(dev)-ellc_size) +#define HAS_BSD(dev) (INTEL_INFO(dev)-ring_mask BSD_RING) +#define HAS_BLT(dev) (INTEL_INFO(dev)-ring_mask BLT_RING) +#define HAS_VEBOX(dev) (INTEL_INFO(dev)-ring_mask VEBOX_RING) +#define HAS_LLC(dev) (INTEL_INFO(dev)-has_llc) +#define HAS_WT(dev) ((IS_HASWELL(dev) || IS_BROADWELL(dev)) \ + to_i915(dev)-ellc_size) #define I915_NEED_GFX_HWS(dev) (INTEL_INFO(dev)-need_gfx_hws) #define HAS_HW_CONTEXTS(dev) (INTEL_INFO(dev)-gen = 6) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 0d514ff..4969162 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -68,10 +68,19 @@ static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr, { gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0; pte |= addr; - if (level != I915_CACHE_NONE) - pte |= PPAT_CACHED_INDEX; - else + + switch (level) { + case I915_CACHE_NONE: pte |= PPAT_UNCACHED_INDEX; + break; + case I915_CACHE_WT: + pte |= PPAT_DISPLAY_ELLC_INDEX; + break; + default: + pte |= PPAT_CACHED_INDEX; + break; + } + return pte; } @@ -1368,7 +1377,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, (gen8_gtt_pte_t __iomem *)dev_priv-gtt.gsm + first_entry; int i = 0; struct sg_page_iter sg_iter; - dma_addr_t addr; + dma_addr_t addr = 0; for_each_sg_page(st-sgl, sg_iter, st-nents, 0) { addr = sg_dma_address(sg_iter.sg) + -- 1.8.3.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 12/14] drm/i915/bdw: enable eDRAM.
Reviewed-by: Brad Volkin bradley.d.vol...@intel.com On Fri, Apr 18, 2014 at 02:04:28PM -0700, Rodrigo Vivi wrote: From: Ben Widawsky benjamin.widaw...@intel.com The same register exists for querying and programming eDRAM AKA eLLC. So we can simply use it. For now, use all the same defaults as we had for Haswell, since like Haswell, I have no further details. I do not actually have a part with eDRAM, so I cannot test this. Signed-off-by: Ben Widawsky b...@bwidawsk.net Signed-off-by: Rodrigo Vivi rodrigo.v...@gmail.com --- drivers/gpu/drm/i915/intel_uncore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index c8969e3..0e6b502 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -373,7 +373,7 @@ void intel_uncore_early_sanitize(struct drm_device *dev) if (HAS_FPGA_DBG_UNCLAIMED(dev)) __raw_i915_write32(dev_priv, FPGA_DBG, FPGA_DBG_RM_NOCLAIM); - if (IS_HASWELL(dev) + if ((IS_HASWELL(dev) || IS_BROADWELL(dev)) (__raw_i915_read32(dev_priv, HSW_EDRAM_PRESENT) == 1)) { /* The docs do not explain exactly how the calculation can be * made. It is somewhat guessable, but for now, it's always -- 1.8.3.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 13/14] drm/i915/bdw: Disable idle DOP clock gating
Reviewed-by: Brad Volkin bradley.d.vol...@intel.com On Fri, Apr 18, 2014 at 02:04:29PM -0700, Rodrigo Vivi wrote: From: Ben Widawsky benjamin.widaw...@intel.com It seems we need this at least for the current platforms we have, but probably not later. In any event, it should cause too much harm as we do the same thing on several other platforms. Signed-off-by: Ben Widawsky b...@bwidawsk.net Signed-off-by: Rodrigo Vivi rodrigo.v...@gmail.com --- drivers/gpu/drm/i915/intel_pm.c | 4 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index a66000c..8d40786 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4924,6 +4924,10 @@ static void gen8_init_clock_gating(struct drm_device *dev) I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE)); + /* WaDisableDopClockGating:bdw May not be needed for production */ + I915_WRITE(GEN7_ROW_CHICKEN2, +_MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); + /* WaSwitchSolVfFArbitrationPriority:bdw */ I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); -- 1.8.3.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] mm: Throttle shrinkers harder
On 04/26/2014 06:10 AM, Chris Wilson wrote: Thanks for the pointer to register_oom_notifier(), I can use that to make sure that we do purge everything from the GPU, and do a sanity check at the same time, before we start killing processes. Actually, that one doesn't get called until we're *SURE* we are going to OOM. Any action taken in there won't be taken in to account. blocking_notifier_call_chain(oom_notify_list, 0, freed); if (freed 0) /* Got some memory back in the last second. */ return; That looks like it should abort the oom and so repeat the allocation attempt? Or is that too hopeful? You're correct. I was reading the code utterly wrong. ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] tests/pm_pc8: skip the test if runtime PM is disabled
2014-04-25 5:08 GMT-03:00 Daniel Vetter dan...@ffwll.ch: On Fri, Apr 25, 2014 at 10:29:57AM +0300, Imre Deak wrote: The PC8 state won't be entered unless runtime PM is enabled, so support for PC8 residency counters alone is not enough to run this test. This is true only for the very latest kernels. We have Kernels with PC8 support and without runtime PM support. Do you actually need this specific patch to solve any problems you're currently having? If not, maybe we could revert it so people with stable Kernels will be able to run IGT. Thanks, Paulo Signed-off-by: Imre Deak imre.d...@intel.com Reviewed-by: Daniel Vetter daniel.vet...@ffwll.ch --- tests/pm_pc8.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pm_pc8.c b/tests/pm_pc8.c index 010af44..9a95326 100644 --- a/tests/pm_pc8.c +++ b/tests/pm_pc8.c @@ -769,7 +769,7 @@ static void setup_environment(void) printf(Runtime PM support: %d\n, has_runtime_pm); printf(PC8 residency support: %d\n, has_pc8); - igt_require(has_runtime_pm || has_pc8); + igt_require(has_runtime_pm); } static void teardown_environment(void) -- 1.8.4 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Paulo Zanoni ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] Revert drm/i915: fix infinite loop at gen6_update_ring_freq
2014-04-23 4:05 GMT-03:00 Daniel Vetter dan...@ffwll.ch: On Tue, Apr 22, 2014 at 06:25:12PM -0300, Paulo Zanoni wrote: 2014-04-11 6:02 GMT-03:00 Daniel Vetter dan...@ffwll.ch: On Thu, Apr 10, 2014 at 10:52:26AM -0700, Ben Widawsky wrote: On Thu, Apr 10, 2014 at 10:50:43AM -0700, Ben Widawsky wrote: On Thu, Apr 10, 2014 at 09:04:47AM +0200, Daniel Vetter wrote: This reverts commit 4b28a1f3ef55a3b0b68dbab1fe6dbaf18e186710. This patch duct-tapes over some issue in the current bdw rps patches which must wait with enabling rc6/rps until the very first batch has been submitted by userspace. But those patches aren't merged yet, and for upstream we need to have an in-kernel emission of the very first batch. I shouldn't have merged this patch so let's revert it again. I said this on the mailing last before you merged the patch. 20140402050338.gb13...@bwidawsk.net 20140402145813.GV7225@phenom.ffwll.local will explain things. There's now a regression report pointing to the revert: https://bugs.freedesktop.org/show_bug.cgi?id=77565 . What is the proposed solution now? Just WARN and still avoid the infinite loop? Or keep the infinite loop and leave the bug open? Disable BDW runtime PM? I've thought that we can only hit this with the as-yet unmerged rc6 patches on bdw, so I'm really confused why this blows up now? In any case I've thought Imre has stumbled over a similar issue on byt and he has a fix to prevent runtime pm until the delayed rps init has run. I've assigned the bug to him. Still confused why this suddenly blew up ... Sorry for the delayed response. The bug is very simple: since we did not enable RC6, by the time we run gen6_update_ring_freq(), the RPS limits will all be zero. The loop decrements a variable until it reaches a point where it is smaller than the other. But since the other variable is zero, the loop won't end since we can't be smaller than zero on the unsigned world, no matter how much we decrement it. This can probably be reproduced on non-BDW machines too, with RC6 disabled. -Daniel -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch -- Paulo Zanoni ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] tests/pm_pc8: skip the test if runtime PM is disabled
On Mon, 2014-04-28 at 14:57 -0300, Paulo Zanoni wrote: 2014-04-25 5:08 GMT-03:00 Daniel Vetter dan...@ffwll.ch: On Fri, Apr 25, 2014 at 10:29:57AM +0300, Imre Deak wrote: The PC8 state won't be entered unless runtime PM is enabled, so support for PC8 residency counters alone is not enough to run this test. This is true only for the very latest kernels. We have Kernels with PC8 support and without runtime PM support. Do you actually need this specific patch to solve any problems you're currently having? If not, maybe we could revert it so people with stable Kernels will be able to run IGT. The following one needs this at least on BDW/current kernel: https://bugs.freedesktop.org/show_bug.cgi?id=77565 I might be wrong, but I thought we don't need backward compatibility in igt. We could also make the check kernel version dependent, I'm not sure if it's worth the effort. --Imre Thanks, Paulo Signed-off-by: Imre Deak imre.d...@intel.com Reviewed-by: Daniel Vetter daniel.vet...@ffwll.ch --- tests/pm_pc8.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pm_pc8.c b/tests/pm_pc8.c index 010af44..9a95326 100644 --- a/tests/pm_pc8.c +++ b/tests/pm_pc8.c @@ -769,7 +769,7 @@ static void setup_environment(void) printf(Runtime PM support: %d\n, has_runtime_pm); printf(PC8 residency support: %d\n, has_pc8); - igt_require(has_runtime_pm || has_pc8); + igt_require(has_runtime_pm); } static void teardown_environment(void) -- 1.8.4 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] tests/pm_pc8: skip the test if runtime PM is disabled
2014-04-28 15:22 GMT-03:00 Imre Deak imre.d...@intel.com: On Mon, 2014-04-28 at 14:57 -0300, Paulo Zanoni wrote: 2014-04-25 5:08 GMT-03:00 Daniel Vetter dan...@ffwll.ch: On Fri, Apr 25, 2014 at 10:29:57AM +0300, Imre Deak wrote: The PC8 state won't be entered unless runtime PM is enabled, so support for PC8 residency counters alone is not enough to run this test. This is true only for the very latest kernels. We have Kernels with PC8 support and without runtime PM support. Do you actually need this specific patch to solve any problems you're currently having? If not, maybe we could revert it so people with stable Kernels will be able to run IGT. The following one needs this at least on BDW/current kernel: https://bugs.freedesktop.org/show_bug.cgi?id=77565 This is not a fix to the bug. By skipping the whole test suite, this patch is just hiding the fact that PC8 doesn't work on BDW. Yes, PC8 on BDW (specifically) won't work without runtime PM, but that's not true for HSW on some Kernels. The proper fix to the bug above is to fix RC6 on BDW, or revert the revert. I might be wrong, but I thought we don't need backward compatibility in igt. We could also make the check kernel version dependent, I'm not sure if it's worth the effort. --Imre Thanks, Paulo Signed-off-by: Imre Deak imre.d...@intel.com Reviewed-by: Daniel Vetter daniel.vet...@ffwll.ch --- tests/pm_pc8.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pm_pc8.c b/tests/pm_pc8.c index 010af44..9a95326 100644 --- a/tests/pm_pc8.c +++ b/tests/pm_pc8.c @@ -769,7 +769,7 @@ static void setup_environment(void) printf(Runtime PM support: %d\n, has_runtime_pm); printf(PC8 residency support: %d\n, has_pc8); - igt_require(has_runtime_pm || has_pc8); + igt_require(has_runtime_pm); } static void teardown_environment(void) -- 1.8.4 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Paulo Zanoni ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] tests/pm_pc8: skip the test if runtime PM is disabled
On Mon, 2014-04-28 at 15:35 -0300, Paulo Zanoni wrote: 2014-04-28 15:22 GMT-03:00 Imre Deak imre.d...@intel.com: On Mon, 2014-04-28 at 14:57 -0300, Paulo Zanoni wrote: 2014-04-25 5:08 GMT-03:00 Daniel Vetter dan...@ffwll.ch: On Fri, Apr 25, 2014 at 10:29:57AM +0300, Imre Deak wrote: The PC8 state won't be entered unless runtime PM is enabled, so support for PC8 residency counters alone is not enough to run this test. This is true only for the very latest kernels. We have Kernels with PC8 support and without runtime PM support. Do you actually need this specific patch to solve any problems you're currently having? If not, maybe we could revert it so people with stable Kernels will be able to run IGT. The following one needs this at least on BDW/current kernel: https://bugs.freedesktop.org/show_bug.cgi?id=77565 This is not a fix to the bug. By skipping the whole test suite, this patch is just hiding the fact that PC8 doesn't work on BDW. Yes, PC8 on BDW (specifically) won't work without runtime PM, but that's not true for HSW on some Kernels. The proper fix to the bug above is to fix RC6 on BDW, or revert the revert. The igt patch is not supposed to fix the issue, but it is correct for new kernels on all platforms. In case runtime PM is disabled - for example because it didn't get enabled in the first place or because RC6 is force disabled through a module option - the test should be skipped. --Imre I might be wrong, but I thought we don't need backward compatibility in igt. We could also make the check kernel version dependent, I'm not sure if it's worth the effort. --Imre Thanks, Paulo Signed-off-by: Imre Deak imre.d...@intel.com Reviewed-by: Daniel Vetter daniel.vet...@ffwll.ch --- tests/pm_pc8.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pm_pc8.c b/tests/pm_pc8.c index 010af44..9a95326 100644 --- a/tests/pm_pc8.c +++ b/tests/pm_pc8.c @@ -769,7 +769,7 @@ static void setup_environment(void) printf(Runtime PM support: %d\n, has_runtime_pm); printf(PC8 residency support: %d\n, has_pc8); - igt_require(has_runtime_pm || has_pc8); + igt_require(has_runtime_pm); } static void teardown_environment(void) -- 1.8.4 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] tests/pm_pc8: skip the test if runtime PM is disabled
On Mon, 2014-04-28 at 15:35 -0300, Paulo Zanoni wrote: 2014-04-28 15:22 GMT-03:00 Imre Deak imre.d...@intel.com: On Mon, 2014-04-28 at 14:57 -0300, Paulo Zanoni wrote: 2014-04-25 5:08 GMT-03:00 Daniel Vetter dan...@ffwll.ch: On Fri, Apr 25, 2014 at 10:29:57AM +0300, Imre Deak wrote: The PC8 state won't be entered unless runtime PM is enabled, so support for PC8 residency counters alone is not enough to run this test. This is true only for the very latest kernels. We have Kernels with PC8 support and without runtime PM support. Do you actually need this specific patch to solve any problems you're currently having? If not, maybe we could revert it so people with stable Kernels will be able to run IGT. The following one needs this at least on BDW/current kernel: https://bugs.freedesktop.org/show_bug.cgi?id=77565 This is not a fix to the bug. By skipping the whole test suite, this patch is just hiding the fact that PC8 doesn't work on BDW. Yes, PC8 on BDW (specifically) won't work without runtime PM, but that's not true for HSW on some Kernels. The proper fix to the bug above is to fix RC6 on BDW, or revert the revert. Note that recently we made RC6 a requirement for runtime PM, so fixing it is the only option for re-enabling runtime PM. I might be wrong, but I thought we don't need backward compatibility in igt. We could also make the check kernel version dependent, I'm not sure if it's worth the effort. --Imre Thanks, Paulo Signed-off-by: Imre Deak imre.d...@intel.com Reviewed-by: Daniel Vetter daniel.vet...@ffwll.ch --- tests/pm_pc8.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pm_pc8.c b/tests/pm_pc8.c index 010af44..9a95326 100644 --- a/tests/pm_pc8.c +++ b/tests/pm_pc8.c @@ -769,7 +769,7 @@ static void setup_environment(void) printf(Runtime PM support: %d\n, has_runtime_pm); printf(PC8 residency support: %d\n, has_pc8); - igt_require(has_runtime_pm || has_pc8); + igt_require(has_runtime_pm); } static void teardown_environment(void) -- 1.8.4 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] Revert drm/i915: fix infinite loop at gen6_update_ring_freq
On Mon, Apr 28, 2014 at 8:14 PM, Paulo Zanoni przan...@gmail.com wrote: This can probably be reproduced on non-BDW machines too, with RC6 disabled. If I understand Imre's patch correctly the bug is that we didn't have rc6 on bdw, but the sanitize function didn't make this clear leading to bugs. If my understanding is wrong the I need to drop Imre's patch again. -Daniel -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] Revert drm/i915: fix infinite loop at gen6_update_ring_freq
On Mon, 2014-04-28 at 21:23 +0200, Daniel Vetter wrote: On Mon, Apr 28, 2014 at 8:14 PM, Paulo Zanoni przan...@gmail.com wrote: This can probably be reproduced on non-BDW machines too, with RC6 disabled. If I understand Imre's patch correctly the bug is that we didn't have rc6 on bdw, but the sanitize function didn't make this clear leading to bugs. Yes, that's correct. For runtime PM we require RC6 to be enabled, and we use intel_enable_rc6() to check for this. Before patch [1] intel_enable_rc6() reported incorrectly on BDW that RC6 is enabled. --Imre [1] http://lists.freedesktop.org/archives/intel-gfx/2014-April/044354.html If my understanding is wrong the I need to drop Imre's patch again. -Daniel ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2] drm/i915: Debugfs disable RPS boost and idle
RP frequency request is affected by 2 modules: normal turbo algorithm and RPS boost algorithm. By adding RPS boost algorithm to the mix, the final frequency becomes relatively unpredictable. Add a switch to enable/disable RPS boost functionality. When disabled, RP frequency will follow the normal turbo algorithm only. Intention: when boost and idle are disabled, we have a clear vision of turbo algorithm. It‘s very helpful to verify if the turbo algorithm is working as expected. Without debugfs hooks, the RPS boost or idle may kicks in at anytime and any circumstances. V1-V2: Follow Daniel's comment to explain the intention. Signed-off-by: Daisy Sun daisy@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 40 + drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 8 ++-- 3 files changed, 47 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 1e83ae4..ff71214 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3486,6 +3486,45 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_drop_caches_fops, i915_drop_caches_get, i915_drop_caches_set, 0x%08llx\n); +static int i915_rps_disable_boost_get(void *data, u64 *val) +{ + struct drm_device *dev = data; + struct drm_i915_private *dev_priv = dev-dev_private; + + if (INTEL_INFO(dev)-gen 6) + return -ENODEV; + + *val = dev_priv-rps.debugfs_disable_boost; + + return 0; +} + +static int i915_rps_disable_boost_set(void *data, u64 val) +{ + struct drm_device *dev = data; + struct drm_i915_private *dev_priv = dev-dev_private; + int ret; + + flush_delayed_work(dev_priv-rps.delayed_resume_work); + + DRM_DEBUG_DRIVER(Setting RPS disable Boost-Idle mode to %s\n, +val ? on : off); + + ret = mutex_lock_interruptible(dev_priv-rps.hw_lock); + if (ret) + return ret; + + dev_priv-rps.debugfs_disable_boost = val; + + mutex_unlock(dev_priv-rps.hw_lock); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(i915_rps_disable_boost_fops, + i915_rps_disable_boost_get, i915_rps_disable_boost_set, + %llu\n); + static int i915_max_freq_get(void *data, u64 *val) { @@ -3821,6 +3860,7 @@ static const struct i915_debugfs_files { {i915_wedged, i915_wedged_fops}, {i915_max_freq, i915_max_freq_fops}, {i915_min_freq, i915_min_freq_fops}, + {i915_rps_disable_boost, i915_rps_disable_boost_fops}, {i915_cache_sharing, i915_cache_sharing_fops}, {i915_ring_stop, i915_ring_stop_fops}, {i915_ring_missed_irq, i915_ring_missed_irq_fops}, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 272aa7a..9c427da 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -847,6 +847,7 @@ struct intel_gen6_power_mgmt { int last_adj; enum { LOW_POWER, BETWEEN, HIGH_POWER } power; + bool debugfs_disable_boost; bool enabled; struct delayed_work delayed_resume_work; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 75c1c76..6acac14 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3163,7 +3163,9 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv) struct drm_device *dev = dev_priv-dev; mutex_lock(dev_priv-rps.hw_lock); - if (dev_priv-rps.enabled) { + + if (dev_priv-rps.enabled +!dev_priv-rps.debugfs_disable_boost) { if (IS_VALLEYVIEW(dev)) vlv_set_rps_idle(dev_priv); else @@ -3178,7 +3180,9 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv) struct drm_device *dev = dev_priv-dev; mutex_lock(dev_priv-rps.hw_lock); - if (dev_priv-rps.enabled) { + + if (dev_priv-rps.enabled +!dev_priv-rps.debugfs_disable_boost) { if (IS_VALLEYVIEW(dev)) valleyview_set_rps(dev_priv-dev, dev_priv-rps.max_freq_softlimit); else -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 05.1/24] drm/i915: Make sure computed watermarks never overflow the registers
2014-04-28 9:44 GMT-03:00 ville.syrj...@linux.intel.com: From: Ville Syrjälä ville.syrj...@linux.intel.com When we calculate the watermarks for a pipe make sure we leave any level fully zeroed out if it would exceed any of the maximum values that fit in the registers. This will be important later when we start to use also disabled watermark levels during LP1+ merging. Thanks for splitting the patch! It's much easier to review now :) Signed-off-by: Ville Syrjälä ville.syrj...@linux.intel.com --- drivers/gpu/drm/i915/intel_pm.c | 43 ++--- 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index f061ef1..c722acb 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1921,6 +1921,16 @@ static void ilk_compute_wm_maximums(const struct drm_device *dev, max-fbc = ilk_fbc_wm_reg_max(dev); } +static void ilk_compute_wm_reg_maximums(struct drm_device *dev, + int level, + struct ilk_wm_maximums *max) +{ + max-pri = ilk_plane_wm_reg_max(dev, level, false); + max-spr = ilk_plane_wm_reg_max(dev, level, true); + max-cur = ilk_cursor_wm_reg_max(dev, level); + max-fbc = ilk_fbc_wm_reg_max(dev); +} + static bool ilk_validate_wm_level(int level, const struct ilk_wm_maximums *max, struct intel_wm_level *result) @@ -2178,9 +2188,6 @@ static bool intel_compute_pipe_wm(struct drm_crtc *crtc, }; struct ilk_wm_maximums max; - /* LP0 watermarks always use 1/2 DDB partitioning */ - ilk_compute_wm_maximums(dev, 0, config, INTEL_DDB_PART_1_2, max); - pipe_wm-pipe_enabled = params-active; pipe_wm-sprites_enabled = params-spr.enabled; pipe_wm-sprites_scaled = params-spr.scaled; @@ -2193,15 +2200,37 @@ static bool intel_compute_pipe_wm(struct drm_crtc *crtc, if (params-spr.scaled) max_level = 0; - for (level = 0; level = max_level; level++) - ilk_compute_wm_level(dev_priv, level, params, -pipe_wm-wm[level]); + ilk_compute_wm_level(dev_priv, 0, params, pipe_wm-wm[0]); if (IS_HASWELL(dev) || IS_BROADWELL(dev)) pipe_wm-linetime = hsw_compute_linetime_wm(dev, crtc); + /* LP0 watermarks always use 1/2 DDB partitioning */ + ilk_compute_wm_maximums(dev, 0, config, INTEL_DDB_PART_1_2, max); + /* At least LP0 must be valid */ - return ilk_validate_wm_level(0, max, pipe_wm-wm[0]); + if (!ilk_validate_wm_level(0, max, pipe_wm-wm[0])) + return false; The only caller of this function does not really check its return value. OTOH, fixing this is outside of the scope of your patch, I'm just mentioning in case you have some watermarks TODO list :) Reviewed-by: Paulo Zanoni paulo.r.zan...@intel.com + + ilk_compute_wm_reg_maximums(dev, 1, max); + + for (level = 1; level = max_level; level++) { + struct intel_wm_level wm = {}; + + ilk_compute_wm_level(dev_priv, level, params, wm); + + /* +* Disable any watermark level that exceeds the +* register maximums since such watermarks are +* always invalid. +*/ + if (!ilk_validate_wm_level(level, max, wm)) + break; + + pipe_wm-wm[level] = wm; + } + + return true; } /* -- 1.8.3.2 -- Paulo Zanoni ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v2 05.2/24] drm/i915: Merge LP1+ watermarks in safer way
2014-04-28 9:44 GMT-03:00 ville.syrj...@linux.intel.com: From: Ville Syrjälä ville.syrj...@linux.intel.com On ILK when we disable a particular watermark level, we must maintain the actual watermark values for that level for some time (until the next vblank possibly). Otherwise we risk underruns. In order to achieve that result we must merge the LP1+ watermarks a bit differently since we must also merge levels that are to be disabled. We must also make sure we don't overflow the fields in the watermark registers in case the calculated watermarks come out too big to fit. As early as possbile we mark all computed watermark levels as disabled if they would exceed the register maximums. We make sure to leave the actual watermarks for such levels zeroed out. The during _Then_ during merging, I guess. Reviewed-by: Paulo Zanoni paulo.r.zan...@intel.com merging, we take the maxium values for every level, regardless if they're disabled or not. That may seem a bit pointless since at the moment all the watermark levels we merge should have their values zeroed if the level is already disabled. However soon we will be dealing with intermediate watermarks that, in addition to the new watermark values, also contain the previous watermark values, and so levels that are disabled may no longer be zeroed out. v2: Split the patch in two (Paulo) Use if() instead of when merging -enable (Paulo) Signed-off-by: Ville Syrjälä ville.syrj...@linux.intel.com --- drivers/gpu/drm/i915/intel_pm.c | 37 - 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index c722acb..b89fc33 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2242,6 +2242,8 @@ static void ilk_merge_wm_level(struct drm_device *dev, { const struct intel_crtc *intel_crtc; + ret_wm-enable = true; + list_for_each_entry(intel_crtc, dev-mode_config.crtc_list, base.head) { const struct intel_pipe_wm *active = intel_crtc-wm.active; const struct intel_wm_level *wm = active-wm[level]; @@ -2249,16 +2251,19 @@ static void ilk_merge_wm_level(struct drm_device *dev, if (!active-pipe_enabled) continue; + /* +* The watermark values may have been used in the past, +* so we must maintain them in the registers for some +* time even if the level is now disabled. +*/ if (!wm-enable) - return; + ret_wm-enable = false; ret_wm-pri_val = max(ret_wm-pri_val, wm-pri_val); ret_wm-spr_val = max(ret_wm-spr_val, wm-spr_val); ret_wm-cur_val = max(ret_wm-cur_val, wm-cur_val); ret_wm-fbc_val = max(ret_wm-fbc_val, wm-fbc_val); } - - ret_wm-enable = true; } /* @@ -2270,6 +2275,7 @@ static void ilk_wm_merge(struct drm_device *dev, struct intel_pipe_wm *merged) { int level, max_level = ilk_wm_max_level(dev); + int last_enabled_level = max_level; /* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */ if ((INTEL_INFO(dev)-gen = 6 || IS_IVYBRIDGE(dev)) @@ -2285,15 +2291,19 @@ static void ilk_wm_merge(struct drm_device *dev, ilk_merge_wm_level(dev, level, wm); - if (!ilk_validate_wm_level(level, max, wm)) - break; + if (level last_enabled_level) + wm-enable = false; + else if (!ilk_validate_wm_level(level, max, wm)) + /* make sure all following levels get disabled */ + last_enabled_level = level - 1; /* * The spec says it is preferred to disable * FBC WMs instead of disabling a WM level. */ if (wm-fbc_val max-fbc) { - merged-fbc_wm_enabled = false; + if (wm-enable) + merged-fbc_wm_enabled = false; wm-fbc_val = 0; } } @@ -2348,14 +2358,19 @@ static void ilk_compute_wm_results(struct drm_device *dev, level = ilk_wm_lp_to_level(wm_lp, merged); r = merged-wm[level]; - if (!r-enable) - break; - results-wm_lp[wm_lp - 1] = WM3_LP_EN | + /* +* Maintain the watermark values even if the level is +* disabled. Doing otherwise could cause underruns. +*/ + results-wm_lp[wm_lp - 1] = (ilk_wm_lp_latency(dev, level) WM1_LP_LATENCY_SHIFT) |
Re: [Intel-gfx] [PATCH v2 07/24] drm/i915: Remove useless checks from primary enable/disable
2014-04-28 9:53 GMT-03:00 ville.syrj...@linux.intel.com: From: Ville Syrjälä ville.syrj...@linux.intel.com We won't be calling intel_enable_primary_plane() or intel_disable_primary_plane() with the primary plane in the wrong state. So remove the useless DISPLAY_PLANE_ENABLE checks. v2: Convert the checks to WARNs instead (Daniel,Paulo) Reviewed-by: Paulo Zanoni paulo.r.zan...@intel.com Signed-off-by: Ville Syrjälä ville.syrj...@linux.intel.com --- drivers/gpu/drm/i915/intel_display.c | 6 ++ 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 7938556..af9e3fe 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1896,8 +1896,7 @@ static void intel_enable_primary_plane(struct drm_i915_private *dev_priv, reg = DSPCNTR(plane); val = I915_READ(reg); - if (val DISPLAY_PLANE_ENABLE) - return; + WARN_ON(val DISPLAY_PLANE_ENABLE); I915_WRITE(reg, val | DISPLAY_PLANE_ENABLE); intel_flush_primary_plane(dev_priv, plane); @@ -1926,8 +1925,7 @@ static void intel_disable_primary_plane(struct drm_i915_private *dev_priv, reg = DSPCNTR(plane); val = I915_READ(reg); - if ((val DISPLAY_PLANE_ENABLE) == 0) - return; + WARN_ON((val DISPLAY_PLANE_ENABLE) == 0); I915_WRITE(reg, val ~DISPLAY_PLANE_ENABLE); intel_flush_primary_plane(dev_priv, plane); -- 1.8.3.2 -- Paulo Zanoni ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v2 09/24] drm/i915: Keep vblank interrupts enabled while enabling/disabling planes
2014-04-28 9:58 GMT-03:00 ville.syrj...@linux.intel.com: From: Ville Syrjälä ville.syrj...@linux.intel.com Becasue of the upcoming vblank interrupt driven watermark update BecaUSe. Reviewed-by: Paulo Zanoni paulo.r.zan...@intel.com mechanism we will have use for vblank interrupts during plane enabling/disabling. So don't call drm_vblank_off() until planes are off, and call drm_vblank_on() just before we start to enable the planes. v2: Pimp commit message (Paulo) Signed-off-by: Ville Syrjälä ville.syrj...@linux.intel.com --- drivers/gpu/drm/i915/intel_display.c | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 88df4ea..8d2a31e 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3547,6 +3547,8 @@ static void ilk_crtc_enable_planes(struct drm_crtc *crtc) int pipe = intel_crtc-pipe; int plane = intel_crtc-plane; + drm_vblank_on(dev, pipe); + intel_enable_primary_plane(dev_priv, plane, pipe); intel_enable_planes(crtc); intel_crtc_update_cursor(crtc, true); @@ -3557,8 +3559,6 @@ static void ilk_crtc_enable_planes(struct drm_crtc *crtc) mutex_lock(dev-struct_mutex); intel_update_fbc(dev); mutex_unlock(dev-struct_mutex); - - drm_vblank_on(dev, pipe); } static void ilk_crtc_disable_planes(struct drm_crtc *crtc) @@ -3570,7 +3570,6 @@ static void ilk_crtc_disable_planes(struct drm_crtc *crtc) int plane = intel_crtc-plane; intel_crtc_wait_for_pending_flips(crtc); - drm_vblank_off(dev, pipe); if (dev_priv-fbc.plane == plane) intel_disable_fbc(dev); @@ -3581,6 +3580,8 @@ static void ilk_crtc_disable_planes(struct drm_crtc *crtc) intel_disable_planes(crtc); intel_disable_primary_plane(dev_priv, plane, pipe); intel_wait_for_vblank(dev, pipe); + + drm_vblank_off(dev, pipe); } static void ironlake_crtc_enable(struct drm_crtc *crtc) -- 1.8.3.2 -- Paulo Zanoni ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v2 41/71] drm/i915/chv: Add some workaround notes
2014-04-28 8:31 GMT-03:00 ville.syrj...@linux.intel.com: From: Ville Syrjälä ville.syrj...@linux.intel.com We implement the following workarounds: * WaDisableAsyncFlipPerfMode:chv * WaProgramMiArbOnOffAroundMiSetContext:chv v2: Drop WaDisableSemaphoreAndSyncFlipWait note Reviewed-by: Paulo Zanoni paulo.r.zan...@intel.com Signed-off-by: Ville Syrjälä ville.syrj...@linux.intel.com --- drivers/gpu/drm/i915/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 30b355a..37dc36d 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -614,7 +614,7 @@ mi_set_context(struct intel_ring_buffer *ring, if (ret) return ret; - /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw */ + /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ if (INTEL_INFO(ring-dev)-gen = 7) intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE); else diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index eb3dd26..b025a51 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -599,7 +599,7 @@ static int init_render_ring(struct intel_ring_buffer *ring) * to use MI_WAIT_FOR_EVENT within the CS. It should already be * programmed to '1' on all products. * -* WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw +* WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw,chv */ if (INTEL_INFO(dev)-gen = 6) I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE)); -- 1.8.3.2 -- Paulo Zanoni ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 42/71] drm/i915/chv: Implement WaDisableSamplerPowerBypass for CHV
2014-04-28 5:23 GMT-03:00 Ville Syrjälä ville.syrj...@linux.intel.com: On Fri, Apr 25, 2014 at 05:55:38PM -0300, Paulo Zanoni wrote: 2014-04-09 7:28 GMT-03:00 ville.syrj...@linux.intel.com: From: Rafael Barbalho rafael.barba...@intel.com Cherryview also needs this WA. At least on the chv_rebase tree, this WA is implemented for BDW but it is not documented as pre-prod only, and its name is not there. We should probably add a comment documenting the name and the fact that it is also pre-prod on BDW. IIRC BDW will need it even on production steppings. Hmmm the register documentation says one thing while the WA lists say others... I'll let you discover which one is correct :) I think I have a patch somewhere that add the w/a note for BDW, but I guess I didn't post it yet. Signed-off-by: Rafael Barbalho rafael.barba...@intel.com [vsyrjala: Looks like it's for pre-prodution hw only] Signed-off-by: Ville Syrjälä ville.syrj...@linux.intel.com --- drivers/gpu/drm/i915/intel_pm.c | 4 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 468fe37..60f876c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5405,6 +5405,10 @@ static void cherryview_init_clock_gating(struct drm_device *dev) /* WaDisableSDEUnitClockGating:chv */ I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | GEN8_SDEUNIT_CLOCK_GATE_DISABLE); + + /* WaDisableSamplerPowerBypass:chv (pre-production hw) */ + I915_WRITE(HALF_SLICE_CHICKEN3, + _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS)); I could not find information anywhere if this is the correct implementation. Can you please provide me pointers to the doc you used? The links on Collab seem broken. Just w/a database + bspec are enough for this one. Found it :) Reviewed-by: Paulo Zanoni paulo.r.zan...@intel.com Thanks, Paulo } static void g4x_init_clock_gating(struct drm_device *dev) -- 1.8.3.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Paulo Zanoni -- Ville Syrjälä Intel OTC -- Paulo Zanoni ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915: Support 64b relocations
All the rest of the code to enable this is in my branch. Without my branch, hitting 32b offsets is impossible. The code has always supported 64b, but it's never actually been run of tested. This change doesn't actually fix anything. [1] I am not sure why X won't work yet. I do not get hangs or obvious errors. There are 3 fixes grouped together here. First is to remove the hardcoded 0 for the upper dword of the relocation. The next fix is to use a 64b value for target_offset. The final fix is to not directly apply target_offset to reloc-delta. reloc-delta is part of ABI, and so we cannot change it. As it stands, 32b is enough to represent everything we're interested in representing anyway. The main problem is, we cannot add greater than 32b values to it directly. [1] Almost all of intel-gpu-tools is not yet ready to test 64b relocations. There are a few places that expect 32b values for offsets and these all won't work. Cc: Rafael Barbalho rafael.barba...@intel.com Cc: Chris Wilson ch...@chris-wilson.co.uk Signed-off-by: Ben Widawsky b...@bwidawsk.net --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 23 +-- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 0d806fc..6ffecd2 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -262,10 +262,12 @@ static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) static int relocate_entry_cpu(struct drm_i915_gem_object *obj, - struct drm_i915_gem_relocation_entry *reloc) + struct drm_i915_gem_relocation_entry *reloc, + uint64_t target_offset) { struct drm_device *dev = obj-base.dev; uint32_t page_offset = offset_in_page(reloc-offset); + uint64_t delta = reloc-delta + target_offset; char *vaddr; int ret; @@ -275,7 +277,7 @@ relocate_entry_cpu(struct drm_i915_gem_object *obj, vaddr = kmap_atomic(i915_gem_object_get_page(obj, reloc-offset PAGE_SHIFT)); - *(uint32_t *)(vaddr + page_offset) = reloc-delta; + *(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta); if (INTEL_INFO(dev)-gen = 8) { page_offset = offset_in_page(page_offset + sizeof(uint32_t)); @@ -286,7 +288,7 @@ relocate_entry_cpu(struct drm_i915_gem_object *obj, (reloc-offset + sizeof(uint32_t)) PAGE_SHIFT)); } - *(uint32_t *)(vaddr + page_offset) = 0; + *(uint32_t *)(vaddr + page_offset) = upper_32_bits(delta); } kunmap_atomic(vaddr); @@ -296,10 +298,12 @@ relocate_entry_cpu(struct drm_i915_gem_object *obj, static int relocate_entry_gtt(struct drm_i915_gem_object *obj, - struct drm_i915_gem_relocation_entry *reloc) + struct drm_i915_gem_relocation_entry *reloc, + uint64_t target_offset) { struct drm_device *dev = obj-base.dev; struct drm_i915_private *dev_priv = dev-dev_private; + uint64_t delta = reloc-delta + target_offset; uint32_t __iomem *reloc_entry; void __iomem *reloc_page; int ret; @@ -318,7 +322,7 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj, reloc-offset PAGE_MASK); reloc_entry = (uint32_t __iomem *) (reloc_page + offset_in_page(reloc-offset)); - iowrite32(reloc-delta, reloc_entry); + iowrite32(lower_32_bits(delta), reloc_entry); if (INTEL_INFO(dev)-gen = 8) { reloc_entry += 1; @@ -331,7 +335,7 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj, reloc_entry = reloc_page; } - iowrite32(0, reloc_entry); + iowrite32(upper_32_bits(delta), reloc_entry); } io_mapping_unmap_atomic(reloc_page); @@ -348,7 +352,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, struct drm_gem_object *target_obj; struct drm_i915_gem_object *target_i915_obj; struct i915_vma *target_vma; - uint32_t target_offset; + uint64_t target_offset; int ret; /* we've already hold a reference to all valid objects */ @@ -427,11 +431,10 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, if (obj-active in_atomic()) return -EFAULT; - reloc-delta += target_offset; if (use_cpu_reloc(obj)) - ret = relocate_entry_cpu(obj, reloc); + ret = relocate_entry_cpu(obj, reloc, target_offset); else - ret = relocate_entry_gtt(obj, reloc); + ret = relocate_entry_gtt(obj, reloc, target_offset); if (ret) return ret; -- 1.9.2 ___ Intel-gfx mailing
Re: [Intel-gfx] [PATCH 2/2] drm/i915: Print captured bo for all VM in error state
On Sat, Jan 25, 2014 at 08:10:06PM +0100, Daniel Vetter wrote: On Fri, Jan 24, 2014 at 12:13:44PM -0800, Ben Widawsky wrote: ping Merged the first patch to topic/ppgtt, but punted on the 2nd - I think with Mika's improvement to the guilty batch detection we should be able to fix this better. Or what's the consensus here? Aside: I didn't spot your r-b burried way at the bottom of your mail, hence why I didn't apply them. -Daniel What happened to this patch or its equivalent? On Fri, Jan 10, 2014 at 08:08:26PM +, Chris Wilson wrote: On Fri, Jan 10, 2014 at 11:59:10AM -0800, Ben Widawsky wrote: I will gladly re-review if you make any of my suggested changes. Hmm, I had already done the capture_vma one since that is required to display the right addresses in the error state. The output is like: vm[0] Active [0]: Pinned [10]: 0020 8192 10 00 0 0 P dirty L3+LLC 00202000 4096 01 01 0 0 P snooped or LLC 00203000 131072 40 40 0 0 P dirty snooped or LLC 00223000 4096 01 01 0 0 P snooped or LLC 00224000 4096 01 01 0 0 P snooped or LLC 00225000 131072 40 40 0 0 P dirty snooped or LLC 00245000 4096 01 01 0 0 P snooped or LLC 00246000 131072 40 40 0 0 P dirty snooped or LLC 00266000 8294400 41 00 0 0 P uncached 083f2000 8192 41 00 0 0 P L3+LLC vm[1] Active [0]: Pinned [0]: vm[2] Active [1]: 4096 3f 00 f000 0 dirty bsd snooped or LLC Pinned [0]: vm[3] Active [1]: 4096 3f 00 f00c 0 dirty bsd snooped or LLC Pinned [0]: vm[4] Active [1]: 4096 3f 00 f010 0 dirty bsd snooped or LLC Pinned [0]: vm[5] Active [1]: 4096 3f 00 f016 0 dirty bsd snooped or LLC Pinned [0]: vm[6] Active [1]: 4096 3f 00 f019 0 dirty bsd snooped or LLC Pinned [0]: vm[7] Active [1]: 4096 3f 00 f01e 0 dirty bsd snooped or LLC Pinned [0]: vm[8] Active [1]: 4096 3f 00 f017 0 dirty bsd snooped or LLC Pinned [0]: vm[9] Active [1]: 4096 3f 00 f015 0 dirty bsd snooped or LLC Pinned [0]: vm[10] Active [1]: 4096 3f 00 f02b 0 dirty bsd snooped or LLC Pinned [0]: vm[11] Active [1]: 4096 3f 00 f039 0 dirty bsd snooped or LLC Pinned [0]: vm[12] Active [1]: 4096 3f 00 f03c 0 dirty bsd snooped or LLC Pinned [0]: vm[13] Active [1]: 4096 3f 00 f043 0 dirty bsd snooped or LLC Pinned [0]: vm[14] Active [1]: 4096 3f 00 f04d 0 dirty bsd snooped or LLC Pinned [0]: vm[15] Active [1]: 4096 3f 00 f02f 0 dirty bsd snooped or LLC Pinned [0]: vm[16] Active [1]: 4096 3f 00 f053 0 dirty bsd snooped or LLC Pinned [0]: vm[17] Active [1]: 4096 3f 00 f05e 0 dirty bsd snooped or LLC Pinned [0]: vm[18] Active [1]: 4096 3f 00 f059 0 dirty bsd snooped or LLC Pinned [0]: vm[19] Active [1]: 4096 3f 00 f03e 0 dirty bsd snooped or LLC Pinned [0]: vm[20] Active [1]: 4096 3f 00 f067 0 dirty bsd snooped or LLC Pinned [0]: vm[21] Active [1]: 4096 3f 00 f06a 0 dirty bsd snooped or LLC Pinned [0]: vm[22] Active [1]: 4096 3f 00 f068 0 dirty bsd snooped or LLC Pinned [0]: vm[23] Active [1]: 4096 3f 00 f071 0 dirty bsd snooped or LLC Pinned [0]: vm[24] Active [1]: 4096 3f 00 f074 0 dirty bsd snooped or LLC Pinned [0]: vm[25] Active [1]: 4096 3f 00 f077 0 dirty bsd snooped or LLC Pinned [0]: vm[26] Active [1]: 4096 3f 00 f07d 0 dirty bsd snooped or LLC Pinned [0]: vm[27] Active [1]: 4096 3f 00 f07f 0 dirty bsd snooped or LLC Pinned [0]: vm[28] Active [1]: 4096 3f 00 f082 0 dirty bsd snooped or LLC Pinned [0]: vm[29] Active [1]: 4096 3f 00 f085 0 dirty bsd snooped or LLC Pinned [0]: vm[30] Active [1]: 4096 3f 00 f088 0 dirty bsd snooped or LLC Pinned [0]: vm[31] Active [1]: 4096 3f 00 f05c 0 dirty bsd snooped or LLC Pinned [0]: vm[32] Active [1]: 4096 3f 00 f08e 0 dirty bsd snooped or LLC Pinned [0]: vm[33] Active [1]: 4096 3f 00 f08c 0 dirty bsd snooped or LLC
[Intel-gfx] [PATCH] drm/i915: Expand error state's address width to 64b
Signed-off-by: Ben Widawsky b...@bwidawsk.net --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- drivers/gpu/drm/i915/i915_gpu_error.c | 16 +--- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 539f16db..cdde849 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -366,7 +366,7 @@ struct drm_i915_error_state { struct drm_i915_error_object { int page_count; - u32 gtt_offset; + u64 gtt_offset; u32 *pages[0]; } *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page; @@ -391,7 +391,7 @@ struct drm_i915_error_state { u32 size; u32 name; u32 rseqno, wseqno; - u32 gtt_offset; + u64 gtt_offset; u32 read_domains; u32 write_domain; s32 fence_reg:I915_MAX_NUM_FENCE_BITS; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 481a7d1..a5cd3b0 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -194,7 +194,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m, err_printf(m, %s [%d]:\n, name, count); while (count--) { - err_printf(m, %08x %8u %02x %02x %x %x, + err_printf(m, %16llx %8u %02x %02x %x %x, err-gtt_offset, err-size, err-read_domains, @@ -401,7 +401,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, err_printf(m, (submitted by %s [%d]), error-ring[i].comm, error-ring[i].pid); - err_printf(m, --- gtt_offset = 0x%08x\n, + err_printf(m, --- gtt_offset = 0x%16llx\n, obj-gtt_offset); print_error_obj(m, obj); } @@ -409,7 +409,8 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, obj = error-ring[i].wa_batchbuffer; if (obj) { err_printf(m, %s (w/a) --- gtt_offset = 0x%08x\n, - dev_priv-ring[i].name, obj-gtt_offset); + dev_priv-ring[i].name, + lower_32_bits(obj-gtt_offset)); print_error_obj(m, obj); } @@ -428,14 +429,14 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, if ((obj = error-ring[i].ringbuffer)) { err_printf(m, %s --- ringbuffer = 0x%08x\n, dev_priv-ring[i].name, - obj-gtt_offset); + lower_32_bits(obj-gtt_offset)); print_error_obj(m, obj); } if ((obj = error-ring[i].hws_page)) { err_printf(m, %s --- HW Status = 0x%08x\n, dev_priv-ring[i].name, - obj-gtt_offset); + lower_32_bits(obj-gtt_offset)); offset = 0; for (elt = 0; elt PAGE_SIZE/16; elt += 4) { err_printf(m, [%04x] %08x %08x %08x %08x\n, @@ -451,14 +452,15 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, if ((obj = error-ring[i].ctx)) { err_printf(m, %s --- HW Context = 0x%08x\n, dev_priv-ring[i].name, - obj-gtt_offset); + lower_32_bits(obj-gtt_offset)); print_error_obj(m, obj); } } obj = error-semaphore_obj; if (obj) { - err_printf(m, Semaphore page = 0x%08x\n, obj-gtt_offset); + err_printf(m, Semaphore page = 0x%08x\n, + lower_32_bits(obj-gtt_offset)); for (elt = 0; elt PAGE_SIZE/16; elt += 4) { err_printf(m, [%04x] %08x %08x %08x %08x\n, elt * 4, -- 1.9.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] [v2] drm/i915: Expand error state's address width to 64b
v2: 0 pad the new 8B fields or else intel_error_decode has a hard time. Note, regardless we need an igt update. Signed-off-by: Ben Widawsky b...@bwidawsk.net --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- drivers/gpu/drm/i915/i915_gpu_error.c | 16 +--- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 539f16db..cdde849 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -366,7 +366,7 @@ struct drm_i915_error_state { struct drm_i915_error_object { int page_count; - u32 gtt_offset; + u64 gtt_offset; u32 *pages[0]; } *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page; @@ -391,7 +391,7 @@ struct drm_i915_error_state { u32 size; u32 name; u32 rseqno, wseqno; - u32 gtt_offset; + u64 gtt_offset; u32 read_domains; u32 write_domain; s32 fence_reg:I915_MAX_NUM_FENCE_BITS; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 481a7d1..881ad8f 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -194,7 +194,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m, err_printf(m, %s [%d]:\n, name, count); while (count--) { - err_printf(m, %08x %8u %02x %02x %x %x, + err_printf(m, %016llx %8u %02x %02x %x %x, err-gtt_offset, err-size, err-read_domains, @@ -401,7 +401,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, err_printf(m, (submitted by %s [%d]), error-ring[i].comm, error-ring[i].pid); - err_printf(m, --- gtt_offset = 0x%08x\n, + err_printf(m, --- gtt_offset = 0x%016llx\n, obj-gtt_offset); print_error_obj(m, obj); } @@ -409,7 +409,8 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, obj = error-ring[i].wa_batchbuffer; if (obj) { err_printf(m, %s (w/a) --- gtt_offset = 0x%08x\n, - dev_priv-ring[i].name, obj-gtt_offset); + dev_priv-ring[i].name, + lower_32_bits(obj-gtt_offset)); print_error_obj(m, obj); } @@ -428,14 +429,14 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, if ((obj = error-ring[i].ringbuffer)) { err_printf(m, %s --- ringbuffer = 0x%08x\n, dev_priv-ring[i].name, - obj-gtt_offset); + lower_32_bits(obj-gtt_offset)); print_error_obj(m, obj); } if ((obj = error-ring[i].hws_page)) { err_printf(m, %s --- HW Status = 0x%08x\n, dev_priv-ring[i].name, - obj-gtt_offset); + lower_32_bits(obj-gtt_offset)); offset = 0; for (elt = 0; elt PAGE_SIZE/16; elt += 4) { err_printf(m, [%04x] %08x %08x %08x %08x\n, @@ -451,14 +452,15 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, if ((obj = error-ring[i].ctx)) { err_printf(m, %s --- HW Context = 0x%08x\n, dev_priv-ring[i].name, - obj-gtt_offset); + lower_32_bits(obj-gtt_offset)); print_error_obj(m, obj); } } obj = error-semaphore_obj; if (obj) { - err_printf(m, Semaphore page = 0x%08x\n, obj-gtt_offset); + err_printf(m, Semaphore page = 0x%08x\n, + lower_32_bits(obj-gtt_offset)); for (elt = 0; elt PAGE_SIZE/16; elt += 4) { err_printf(m, [%04x] %08x %08x %08x %08x\n, elt * 4, -- 1.9.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] intel_error_decode: use 64b gtt_offset
See the relevant kernel patch for the details. I guess this breaks support for older error state, I am not actually sure. Without versioning our error state though, I cannot think of a better way. Suggestions are welcome. Signed-off-by: Ben Widawsky b...@bwidawsk.net --- tools/intel_error_decode.c | 14 +++--- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/intel_error_decode.c b/tools/intel_error_decode.c index 1eeff07..d0028a1 100644 --- a/tools/intel_error_decode.c +++ b/tools/intel_error_decode.c @@ -311,17 +311,17 @@ print_fence(unsigned int devid, uint64_t fence) uint32_t head[MAX_RINGS]; int head_ndx = 0; int num_rings = 0; -static void print_batch(int is_batch, const char *ring_name, uint32_t gtt_offset) +static void print_batch(int is_batch, const char *ring_name, uint64_t gtt_offset) { const char *buffer_type[2] = { ringbuffer, batchbuffer }; if (is_batch || !num_rings) - printf(%s (%s) at 0x%08x\n, buffer_type[is_batch], ring_name, gtt_offset); + printf(%s (%s) at 0x%016lx\n, buffer_type[is_batch], ring_name, gtt_offset); else - printf(%s (%s) at 0x%08x; HEAD points to: 0x%08x\n, buffer_type[is_batch], ring_name, gtt_offset, head[head_ndx++ % num_rings] + gtt_offset); + printf(%s (%s) at 0x%016lx; HEAD points to: 0x%016lx\n, buffer_type[is_batch], ring_name, gtt_offset, head[head_ndx++ % num_rings] + gtt_offset); } static void decode(struct drm_intel_decode *ctx, bool is_batch, - const char *ring_name, uint32_t gtt_offset, uint32_t *data, + const char *ring_name, uint64_t gtt_offset, uint32_t *data, int *count) { if (!*count) @@ -344,7 +344,7 @@ read_data_file(FILE *file) char *line = NULL; size_t line_size; uint32_t offset, value, ring_length = 0; - uint32_t gtt_offset = 0, new_gtt_offset; + uint64_t gtt_offset = 0, new_gtt_offset; char *ring_name = NULL; int is_batch = 1; @@ -361,7 +361,7 @@ read_data_file(FILE *file) if (num_rings == -1) num_rings = head_ndx; - matched = sscanf(dashes, --- gtt_offset = 0x%08x\n, + matched = sscanf(dashes, --- gtt_offset = 0x%016lx\n, new_gtt_offset); if (matched == 1) { decode(decode_ctx, is_batch, ring_name, @@ -373,7 +373,7 @@ read_data_file(FILE *file) continue; } - matched = sscanf(dashes, --- ringbuffer = 0x%08x\n, + matched = sscanf(dashes, --- ringbuffer = 0x%08lx\n, new_gtt_offset); if (matched == 1) { decode(decode_ctx, is_batch, ring_name, -- 1.9.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v2] drm/i915: Debugfs disable RPS boost and idle
On Mon, Apr 28, 2014 at 01:53:52PM -0700, Daisy Sun wrote: RP frequency request is affected by 2 modules: normal turbo algorithm and RPS boost algorithm. By adding RPS boost algorithm to the mix, the final frequency becomes relatively unpredictable. Add a switch to enable/disable RPS boost functionality. When disabled, RP frequency will follow the normal turbo algorithm only. Intention: when boost and idle are disabled, we have a clear vision of turbo algorithm. It‘s very helpful to verify if the turbo algorithm is working as expected. Without debugfs hooks, the RPS boost or idle may kicks in at anytime and any circumstances. V1-V2: Follow Daniel's comment to explain the intention. Signed-off-by: Daisy Sun daisy@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 40 + drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 8 ++-- 3 files changed, 47 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 1e83ae4..ff71214 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3486,6 +3486,45 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_drop_caches_fops, i915_drop_caches_get, i915_drop_caches_set, 0x%08llx\n); +static int i915_rps_disable_boost_get(void *data, u64 *val) +{ + struct drm_device *dev = data; + struct drm_i915_private *dev_priv = dev-dev_private; + + if (INTEL_INFO(dev)-gen 6) + return -ENODEV; + + *val = dev_priv-rps.debugfs_disable_boost; + + return 0; +} + +static int i915_rps_disable_boost_set(void *data, u64 val) +{ + struct drm_device *dev = data; + struct drm_i915_private *dev_priv = dev-dev_private; + int ret; + + flush_delayed_work(dev_priv-rps.delayed_resume_work); I'm not really sure why you feel it's necessary to flush the wq here. Note that you have no real safety since you cannot acquire the lock, and another event can get queued up after the flush. In other words, whatever you're trying to do probably can fail. Also note that without this, a simple atomic_t would suffice for debugfs_disable_boost. + + DRM_DEBUG_DRIVER(Setting RPS disable Boost-Idle mode to %s\n, + val ? on : off); + + ret = mutex_lock_interruptible(dev_priv-rps.hw_lock); + if (ret) + return ret; + + dev_priv-rps.debugfs_disable_boost = val; + + mutex_unlock(dev_priv-rps.hw_lock); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(i915_rps_disable_boost_fops, + i915_rps_disable_boost_get, i915_rps_disable_boost_set, + %llu\n); + static int i915_max_freq_get(void *data, u64 *val) { @@ -3821,6 +3860,7 @@ static const struct i915_debugfs_files { {i915_wedged, i915_wedged_fops}, {i915_max_freq, i915_max_freq_fops}, {i915_min_freq, i915_min_freq_fops}, + {i915_rps_disable_boost, i915_rps_disable_boost_fops}, {i915_cache_sharing, i915_cache_sharing_fops}, {i915_ring_stop, i915_ring_stop_fops}, {i915_ring_missed_irq, i915_ring_missed_irq_fops}, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 272aa7a..9c427da 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -847,6 +847,7 @@ struct intel_gen6_power_mgmt { int last_adj; enum { LOW_POWER, BETWEEN, HIGH_POWER } power; + bool debugfs_disable_boost; bool enabled; struct delayed_work delayed_resume_work; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 75c1c76..6acac14 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3163,7 +3163,9 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv) struct drm_device *dev = dev_priv-dev; mutex_lock(dev_priv-rps.hw_lock); - if (dev_priv-rps.enabled) { + + if (dev_priv-rps.enabled + !dev_priv-rps.debugfs_disable_boost) { if (IS_VALLEYVIEW(dev)) vlv_set_rps_idle(dev_priv); else @@ -3178,7 +3180,9 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv) struct drm_device *dev = dev_priv-dev; mutex_lock(dev_priv-rps.hw_lock); - if (dev_priv-rps.enabled) { + + if (dev_priv-rps.enabled + !dev_priv-rps.debugfs_disable_boost) { if (IS_VALLEYVIEW(dev)) valleyview_set_rps(dev_priv-dev, dev_priv-rps.max_freq_softlimit); else -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Ben Widawsky, Intel Open Source Technology Center
[Intel-gfx] [PATCH] drm/i915: Support 64b execbuf
Previously, our code only had a 32b offset value for where the batchbuffer starts. With full PPGTT, and 64b canonical GPU address space, that is an insufficient value. The code to expand is pretty straight forward, and only one platform needs to do anything with the extra bits. Signed-off-by: Ben Widawsky b...@bwidawsk.net --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c| 16 drivers/gpu/drm/i915/intel_ringbuffer.h| 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 6ffecd2..f5f0b92 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1017,7 +1017,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct i915_hw_context *ctx; struct i915_address_space *vm; const u32 ctx_id = i915_execbuffer2_get_context_id(*args); - u32 exec_start = args-batch_start_offset, exec_len; + u64 exec_start = args-batch_start_offset, exec_len; u32 mask, flags; int ret, mode, i; bool need_relocs; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index a42942f..bbe989f 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1324,7 +1324,7 @@ gen8_ring_put_irq(struct intel_ring_buffer *ring) static int i965_dispatch_execbuffer(struct intel_ring_buffer *ring, -u32 offset, u32 length, +u64 offset, u32 length, unsigned flags) { int ret; @@ -1347,7 +1347,7 @@ i965_dispatch_execbuffer(struct intel_ring_buffer *ring, #define I830_BATCH_LIMIT (256*1024) static int i830_dispatch_execbuffer(struct intel_ring_buffer *ring, - u32 offset, u32 len, + u64 offset, u32 len, unsigned flags) { int ret; @@ -1398,7 +1398,7 @@ i830_dispatch_execbuffer(struct intel_ring_buffer *ring, static int i915_dispatch_execbuffer(struct intel_ring_buffer *ring, -u32 offset, u32 len, +u64 offset, u32 len, unsigned flags) { int ret; @@ -1943,7 +1943,7 @@ static int gen6_bsd_ring_flush(struct intel_ring_buffer *ring, static int gen8_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, - u32 offset, u32 len, + u64 offset, u32 len, unsigned flags) { struct drm_i915_private *dev_priv = ring-dev-dev_private; @@ -1957,8 +1957,8 @@ gen8_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, /* FIXME(BDW): Address space and security selectors. */ intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt8)); - intel_ring_emit(ring, offset); - intel_ring_emit(ring, 0); + intel_ring_emit(ring, lower_32_bits(offset)); + intel_ring_emit(ring, upper_32_bits(offset)); intel_ring_emit(ring, MI_NOOP); intel_ring_advance(ring); @@ -1967,7 +1967,7 @@ gen8_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, static int hsw_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, - u32 offset, u32 len, + u64 offset, u32 len, unsigned flags) { int ret; @@ -1988,7 +1988,7 @@ hsw_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, static int gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, - u32 offset, u32 len, + u64 offset, u32 len, unsigned flags) { int ret; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index dbdce5f..cb55cff 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -135,7 +135,7 @@ struct intel_ring_buffer { void(*set_seqno)(struct intel_ring_buffer *ring, u32 seqno); int (*dispatch_execbuffer)(struct intel_ring_buffer *ring, - u32 offset, u32 length, + u64 offset, u32 length, unsigned flags); #define I915_DISPATCH_SECURE 0x1 #define I915_DISPATCH_PINNED 0x2 -- 1.9.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx