Re: [Intel-gfx] [RFC 3/5] drm/i915: Simplify runtime_pm reference for execlists

2015-07-29 Thread Nick Hoath

On 09/07/2015 12:14, Chris Wilson wrote:

On Thu, Jul 09, 2015 at 11:57:42AM +0100, Nick Hoath wrote:

No longer take a runtime_pm reference for each execlist request.  Only
take a single reference when the execlist queue becomes nonempty and
release it when it becomes empty.


Nak. We already hold the runtime_pm for GPU activity.

So we should eliminate the runtime_pm reference for execlists?

-Chris



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 1/2] drm/i915: Stop marking the unaccessible scratch page as UC

2015-07-29 Thread Chris Wilson
Since by design, if not entirely by practice, nothing is allowed to
access the scratch page we use to background fill the VM, then we do not
need to ensure that it is coherent between the CPU and GPU.
set_pages_uc() does a stop_machine() after changing the PAT, and that
significantly impacts upon context creation throughput.

Signed-off-by: Chris Wilson ch...@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 2a1152dfc53c..9868a7d59814 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -409,16 +409,12 @@ static struct i915_page_scratch 
*alloc_scratch_page(struct drm_device *dev)
return ERR_PTR(ret);
}
 
-   set_pages_uc(px_page(sp), 1);
-
return sp;
 }
 
 static void free_scratch_page(struct drm_device *dev,
  struct i915_page_scratch *sp)
 {
-   set_pages_wb(px_page(sp), 1);
-
cleanup_px(dev, sp);
kfree(sp);
 }
-- 
2.4.6

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 2/2] drm/i915: Embed the scratch page struct into each VM

2015-07-29 Thread Chris Wilson
As the scratch page is no longer shared between all VM, and each has
their own, forgo the small allocation and simply embed the scratch page
struct into the i915_address_space.

Signed-off-by: Chris Wilson ch...@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 81 -
 drivers/gpu/drm/i915/i915_gem_gtt.h |  6 +--
 2 files changed, 35 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 9868a7d59814..4c89a8746d97 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -394,29 +394,16 @@ static void fill_page_dma_32(struct drm_device *dev, 
struct i915_page_dma *p,
fill_page_dma(dev, p, v);
 }
 
-static struct i915_page_scratch *alloc_scratch_page(struct drm_device *dev)
+static int
+setup_scratch_page(struct drm_device *dev, struct i915_page_dma *scratch)
 {
-   struct i915_page_scratch *sp;
-   int ret;
-
-   sp = kzalloc(sizeof(*sp), GFP_KERNEL);
-   if (sp == NULL)
-   return ERR_PTR(-ENOMEM);
-
-   ret = __setup_page_dma(dev, px_base(sp), GFP_DMA32 | __GFP_ZERO);
-   if (ret) {
-   kfree(sp);
-   return ERR_PTR(ret);
-   }
-
-   return sp;
+   return __setup_page_dma(dev, scratch, GFP_DMA32 | __GFP_ZERO);
 }
 
-static void free_scratch_page(struct drm_device *dev,
- struct i915_page_scratch *sp)
+static void cleanup_scratch_page(struct drm_device *dev,
+struct i915_page_dma *scratch)
 {
-   cleanup_px(dev, sp);
-   kfree(sp);
+   cleanup_page_dma(dev, scratch);
 }
 
 static struct i915_page_table *alloc_pt(struct drm_device *dev)
@@ -462,7 +449,7 @@ static void gen8_initialize_pt(struct i915_address_space 
*vm,
 {
gen8_pte_t scratch_pte;
 
-   scratch_pte = gen8_pte_encode(px_dma(vm-scratch_page),
+   scratch_pte = gen8_pte_encode(vm-scratch_page.daddr,
  I915_CACHE_LLC, true);
 
fill_px(vm-dev, pt, scratch_pte);
@@ -473,9 +460,9 @@ static void gen6_initialize_pt(struct i915_address_space 
*vm,
 {
gen6_pte_t scratch_pte;
 
-   WARN_ON(px_dma(vm-scratch_page) == 0);
+   WARN_ON(vm-scratch_page.daddr == 0);
 
-   scratch_pte = vm-pte_encode(px_dma(vm-scratch_page),
+   scratch_pte = vm-pte_encode(vm-scratch_page.daddr,
 I915_CACHE_LLC, true, 0);
 
fill32_px(vm-dev, pt, scratch_pte);
@@ -757,7 +744,7 @@ static void gen8_ppgtt_clear_range(struct 
i915_address_space *vm,
 {
struct i915_hw_ppgtt *ppgtt =
container_of(vm, struct i915_hw_ppgtt, base);
-   gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm-scratch_page),
+   gen8_pte_t scratch_pte = gen8_pte_encode(vm-scratch_page.daddr,
 I915_CACHE_LLC, use_scratch);
 
if (!USES_FULL_48BIT_PPGTT(vm-dev)) {
@@ -863,21 +850,22 @@ static void gen8_free_page_tables(struct drm_device *dev,
 static int gen8_init_scratch(struct i915_address_space *vm)
 {
struct drm_device *dev = vm-dev;
+   int ret;
 
-   vm-scratch_page = alloc_scratch_page(dev);
-   if (IS_ERR(vm-scratch_page))
-   return PTR_ERR(vm-scratch_page);
+   ret = setup_scratch_page(dev, vm-scratch_page);
+   if (ret)
+   return ret;
 
vm-scratch_pt = alloc_pt(dev);
if (IS_ERR(vm-scratch_pt)) {
-   free_scratch_page(dev, vm-scratch_page);
+   cleanup_scratch_page(dev, vm-scratch_page);
return PTR_ERR(vm-scratch_pt);
}
 
vm-scratch_pd = alloc_pd(dev);
if (IS_ERR(vm-scratch_pd)) {
free_pt(dev, vm-scratch_pt);
-   free_scratch_page(dev, vm-scratch_page);
+   cleanup_scratch_page(dev, vm-scratch_page);
return PTR_ERR(vm-scratch_pd);
}
 
@@ -886,7 +874,7 @@ static int gen8_init_scratch(struct i915_address_space *vm)
if (IS_ERR(vm-scratch_pdp)) {
free_pd(dev, vm-scratch_pd);
free_pt(dev, vm-scratch_pt);
-   free_scratch_page(dev, vm-scratch_page);
+   cleanup_scratch_page(dev, vm-scratch_page);
return PTR_ERR(vm-scratch_pdp);
}
}
@@ -907,7 +895,7 @@ static void gen8_free_scratch(struct i915_address_space *vm)
free_pdp(dev, vm-scratch_pdp);
free_pd(dev, vm-scratch_pd);
free_pt(dev, vm-scratch_pt);
-   free_scratch_page(dev, vm-scratch_page);
+   cleanup_scratch_page(dev, vm-scratch_page);
 }
 
 static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev,
@@ -1380,7 +1368,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, 
struct seq_file *m)
struct i915_address_space *vm = ppgtt-base;

Re: [Intel-gfx] [PATCH i-g-t] tests/gem_render_linear_blits: Increase min swap required

2015-07-29 Thread Gore, Tim
 -Original Message-
 From: Chris Wilson [mailto:ch...@chris-wilson.co.uk]
 Sent: Wednesday, July 29, 2015 4:20 PM
 To: Gordon, David S
 Cc: Gore, Tim; Morton, Derek J; intel-gfx@lists.freedesktop.org; Wood,
 Thomas
 Subject: Re: [Intel-gfx] [PATCH i-g-t] tests/gem_render_linear_blits: Increase
 min swap required
 
 On Wed, Jul 29, 2015 at 04:14:55PM +0100, Dave Gordon wrote:
  On 29/07/15 14:15, Chris Wilson wrote:
  On Wed, Jul 29, 2015 at 01:10:23PM +, Gore, Tim wrote:
  I don’t see how this implies a kernel bug. It seems like a test
  problem (my subtest as it happens). I was unaware of Android systems
  with small swap partitions (or indeed any swap at all). Not sure I
  can understand the logic of such a tiny swap partition but given the
  situation, unless we can accurately characterise the memory usage of
  the test in advance then we have to either skip the test for small
  swap, or try to monitor memory usage in an ongoing way during the test.
  
  If the system has enough resources to run the test (that is enough
  physical to run an individual batch plus enough swap to hold the
  rest), then the test must not oom.
  -Chris
 
  The test is deliberately attempting to use enough memory to force some
  stuff out to swap, while not hitting a total OOM. That can be a very
  narrow window when the swapspace is small; and the test just guesses
  in advance how much will do the trick rather than gradually increasing
  its demands until it detects that stuff is being swapped.
 
  So not a kernel bug, but something of a failure in the
 
 Pardon? Which part of we have enough physical and virtual to complete the
 test, but an oom is triggered instead is an incorrect assumption?
 -Chris
 
 --
 Chris Wilson, Intel Open Source Technology Centre

we have enough physical and virtual to complete the test - is the
incorrect assumption. We would have enough space if the test were
able to calculate memory usage accurately enough, but the way the
test calculates memory usage is too imprecise, we don’t allow for any
overhead at all. If overhead  (swap/2 - oom threshold) then we're dead.

 Tim

Tim Gore
Intel Corporation (UK) Ltd. - Co. Reg. #1134945 - Pipers Way, Swindon SN3 1RJ

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v6 14/19] drm/i915: object size needs to be u64

2015-07-29 Thread Michel Thierry
In a 48b world, users can try to allocate buffers bigger than 4GB; in
these cases it is important that size is a 64b variable.

v2: Drop the warning about bind with size 0, it shouldn't happen anyway.
Signed-off-by: Michel Thierry michel.thie...@intel.com
---
 drivers/gpu/drm/i915/i915_gem.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5d68578..80f5d97 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3348,7 +3348,8 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object 
*obj,
 {
struct drm_device *dev = obj-base.dev;
struct drm_i915_private *dev_priv = dev-dev_private;
-   u32 size, fence_size, fence_alignment, unfenced_alignment;
+   u32 fence_alignment, unfenced_alignment;
+   u64 size, fence_size;
u64 start =
flags  PIN_OFFSET_BIAS ? flags  PIN_OFFSET_MASK : 0;
u64 end =
@@ -3407,7 +3408,7 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object 
*obj,
 * attempt to find space.
 */
if (size  end) {
-   DRM_DEBUG(Attempting to bind an object (view type=%u) larger 
than the aperture: size=%u  %s aperture=%llu\n,
+   DRM_DEBUG(Attempting to bind an object (view type=%u) larger 
than the aperture: size=%llu  %s aperture=%llu\n,
  ggtt_view ? ggtt_view-type : 0,
  size,
  flags  PIN_MAPPABLE ? mappable : total,
-- 
2.4.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v6 17/19] drm/i915: Wa32bitGeneralStateOffset Wa32bitInstructionBaseOffset

2015-07-29 Thread Michel Thierry
There are some allocations that must be only referenced by 32-bit
offsets. To limit the chances of having the first 4GB already full,
objects not requiring this workaround use DRM_MM_SEARCH_BELOW/
DRM_MM_CREATE_TOP flags

In specific, any resource used with flat/heapless (0x-0xf000)
General State Heap (GSH) or Instruction State Heap (ISH) must be in a
32-bit range, because the General State Offset and Instruction State
Offset are limited to 32-bits.

Objects must have EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag to indicate if
they can be allocated above the 32-bit address range. To limit the
chances of having the first 4GB already full, objects will use
DRM_MM_SEARCH_BELOW + DRM_MM_CREATE_TOP flags when possible.

v2: Changed flag logic from neeeds_32b, to supports_48b.
v3: Moved 48-bit support flag back to exec_object. (Chris, Daniel)
v4: Split pin flags into PIN_ZONE_4G and PIN_HIGH; update PIN_OFFSET_MASK
to use last PIN_ defined instead of hard-coded value; use correct limit
check in eb_vma_misplaced. (Chris)
v5: Don't touch PIN_OFFSET_MASK and update workaround comment (Chris)
v6: Apply pin-high for ggtt too (Chris)
v7: Handle simultaneous pin-high and pin-mappable end correctly (Akash)
Fix check for entries currently using +4GB addresses, use min_t and
other polish in object_bind_to_vm (Chris)

Cc: Chris Wilson ch...@chris-wilson.co.uk
Cc: Akash Goel akash.g...@intel.com
Reviewed-by: Chris Wilson ch...@chris-wilson.co.uk (v4)
Signed-off-by: Michel Thierry michel.thie...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.h|  2 ++
 drivers/gpu/drm/i915/i915_gem.c| 25 +++--
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 13 +
 include/uapi/drm/i915_drm.h|  3 ++-
 4 files changed, 36 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ed2fbcd..c344805 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2775,6 +2775,8 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
 #define PIN_OFFSET_BIAS(13)
 #define PIN_USER   (14)
 #define PIN_UPDATE (15)
+#define PIN_ZONE_4G(16)
+#define PIN_HIGH   (17)
 #define PIN_OFFSET_MASK (~4095)
 int __must_check
 i915_gem_object_pin(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 80f5d97..e1ca63f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3349,11 +3349,9 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object 
*obj,
struct drm_device *dev = obj-base.dev;
struct drm_i915_private *dev_priv = dev-dev_private;
u32 fence_alignment, unfenced_alignment;
+   u32 search_flag, alloc_flag;
+   u64 start, end;
u64 size, fence_size;
-   u64 start =
-   flags  PIN_OFFSET_BIAS ? flags  PIN_OFFSET_MASK : 0;
-   u64 end =
-   flags  PIN_MAPPABLE ? dev_priv-gtt.mappable_end : vm-total;
struct i915_vma *vma;
int ret;
 
@@ -3393,6 +3391,13 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object 
*obj,
size = flags  PIN_MAPPABLE ? fence_size : obj-base.size;
}
 
+   start = flags  PIN_OFFSET_BIAS ? flags  PIN_OFFSET_MASK : 0;
+   end = vm-total;
+   if (flags  PIN_MAPPABLE)
+   end = min_t(u64, end, dev_priv-gtt.mappable_end);
+   if (flags  PIN_ZONE_4G)
+   end = min_t(u64, end, (1ULL  32));
+
if (alignment == 0)
alignment = flags  PIN_MAPPABLE ? fence_alignment :
unfenced_alignment;
@@ -3428,13 +3433,21 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object 
*obj,
if (IS_ERR(vma))
goto err_unpin;
 
+   if (flags  PIN_HIGH) {
+   search_flag = DRM_MM_SEARCH_BELOW;
+   alloc_flag = DRM_MM_CREATE_TOP;
+   } else {
+   search_flag = DRM_MM_SEARCH_DEFAULT;
+   alloc_flag = DRM_MM_CREATE_DEFAULT;
+   }
+
 search_free:
ret = drm_mm_insert_node_in_range_generic(vm-mm, vma-node,
  size, alignment,
  obj-cache_level,
  start, end,
- DRM_MM_SEARCH_DEFAULT,
- DRM_MM_CREATE_DEFAULT);
+ search_flag,
+ alloc_flag);
if (ret) {
ret = i915_gem_evict_something(dev, vm, size, alignment,
   obj-cache_level,
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 923a3c4..78fc881 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ 

[Intel-gfx] [PATCH v6 08/19] drm/i915/gen8: Add 4 level switching infrastructure and lrc support

2015-07-29 Thread Michel Thierry
In 64b (48bit canonical) PPGTT addressing, the PDP0 register contains
the base address to PML4, while the other PDP registers are ignored.

In LRC, the addressing mode must be specified in every context
descriptor, and the base address to PML4 is stored in the reg state.

v2: PML4 update in legacy context switch is left for historic reasons,
the preferred mode of operation is with lrc context based submission.
v3: s/gen8_map_page_directory/gen8_setup_page_directory and
s/gen8_map_page_directory_pointer/gen8_setup_page_directory_pointer.
Also, clflush will be needed for bxt. (Akash)
v4: Squashed lrc-specific code and use a macro to set PML4 register.
v5: Rebase after Mika's ppgtt cleanup / scratch merge patch series.
PDP update in bb_start is only for legacy 32b mode.
v6: Rebase after final merged version of Mika's ppgtt/scratch
patches.
v7: There is no need to update the pml4 register value in
execlists_update_context. (Akash)
v8: Move pd and pdp setup functions to a previous patch, they do not
belong here. (Akash)

Cc: Akash Goel akash.g...@intel.com
Signed-off-by: Ben Widawsky b...@bwidawsk.net
Signed-off-by: Michel Thierry michel.thie...@intel.com (v2+)
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 17 +++
 drivers/gpu/drm/i915/i915_reg.h |  1 +
 drivers/gpu/drm/i915/intel_lrc.c| 60 ++---
 3 files changed, 55 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 4179b80..c6c8af7 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -656,8 +656,8 @@ static int gen8_write_pdp(struct drm_i915_gem_request *req,
return 0;
 }
 
-static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
- struct drm_i915_gem_request *req)
+static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt,
+struct drm_i915_gem_request *req)
 {
int i, ret;
 
@@ -672,6 +672,12 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
return 0;
 }
 
+static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt,
+ struct drm_i915_gem_request *req)
+{
+   return gen8_write_pdp(req, 0, px_dma(ppgtt-pml4));
+}
+
 static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm,
   struct i915_page_directory_pointer *pdp,
   uint64_t start,
@@ -1318,14 +1324,13 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
ppgtt-base.unbind_vma = ppgtt_unbind_vma;
ppgtt-base.bind_vma = ppgtt_bind_vma;
 
-   ppgtt-switch_mm = gen8_mm_switch;
-
if (USES_FULL_48BIT_PPGTT(ppgtt-base.dev)) {
ret = setup_px(ppgtt-base.dev, ppgtt-pml4);
if (ret)
goto free_scratch;
 
ppgtt-base.total = 1ULL  48;
+   ppgtt-switch_mm = gen8_48b_mm_switch;
} else {
ret = __pdp_init(false, ppgtt-pdp);
if (ret)
@@ -1340,6 +1345,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 */
ppgtt-base.total = 
to_i915(ppgtt-base.dev)-gtt.base.total;
 
+   ppgtt-switch_mm = gen8_legacy_mm_switch;
trace_i915_page_directory_pointer_entry_alloc(ppgtt-base,
  0, 0,
  GEN8_PML4E_SHIFT);
@@ -1537,8 +1543,9 @@ static void gen8_ppgtt_enable(struct drm_device *dev)
int j;
 
for_each_ring(ring, dev_priv, j) {
+   u32 four_level = USES_FULL_48BIT_PPGTT(dev) ? 
GEN8_GFX_PPGTT_48B : 0;
I915_WRITE(RING_MODE_GEN7(ring),
-  _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
+  _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level));
}
 }
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 3a77678..5bd1b6a 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -1670,6 +1670,7 @@ enum skl_disp_power_wells {
 #define   GFX_REPLAY_MODE  (111)
 #define   GFX_PSMI_GRANULARITY (110)
 #define   GFX_PPGTT_ENABLE (19)
+#define   GEN8_GFX_PPGTT_48B   (17)
 
 #define VLV_DISPLAY_BASE 0x18
 #define VLV_MIPI_BASE VLV_DISPLAY_BASE
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 99bba8e..0b65188 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -196,13 +196,21 @@
reg_state[CTX_PDP ## n ## _LDW+1] = lower_32_bits(_addr); \
 }
 
+#define ASSIGN_CTX_PML4(ppgtt, reg_state) { \
+   reg_state[CTX_PDP0_UDW + 1] = upper_32_bits(px_dma(ppgtt-pml4)); \
+   reg_state[CTX_PDP0_LDW + 1] = lower_32_bits(px_dma(ppgtt-pml4)); \
+}
+
 enum {
ADVANCED_CONTEXT = 

[Intel-gfx] [PATCH v6 04/19] drm/i915/gen8: Generalize PTE writing for GEN8 PPGTT

2015-07-29 Thread Michel Thierry
The insert_entries function was the function used to write PTEs. For the
PPGTT it was hardcoded to only understand two level page tables, which
was the case for GEN7. We can reuse this for 4 level page tables, and
remove the concept of insert_entries, which was never viable past 2
level page tables anyway, but it requires a bit of rework to make the
function a bit more generic.

This patch begins the generalization work, and it will be heavily used
upon when the 48b code is complete. The patch series attempts to make
each function which touches a part of code specific to the page table
level and here is no exception.

v2: Rebase after Mika's ppgtt cleanup / scratch merge patch series.
v3: Rebase after final merged version of Mika's ppgtt/scratch patches.

Signed-off-by: Ben Widawsky b...@bwidawsk.net
Signed-off-by: Michel Thierry michel.thie...@intel.com (v2)
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 52 +++--
 1 file changed, 39 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index bd56979..f338a13 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -600,24 +600,21 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
return 0;
 }
 
-static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
-  uint64_t start,
-  uint64_t length,
-  bool use_scratch)
+static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm,
+  struct i915_page_directory_pointer *pdp,
+  uint64_t start,
+  uint64_t length,
+  gen8_pte_t scratch_pte)
 {
struct i915_hw_ppgtt *ppgtt =
container_of(vm, struct i915_hw_ppgtt, base);
-   struct i915_page_directory_pointer *pdp = ppgtt-pdp; /* FIXME: 48b */
-   gen8_pte_t *pt_vaddr, scratch_pte;
+   gen8_pte_t *pt_vaddr;
unsigned pdpe = start  GEN8_PDPE_SHIFT  GEN8_PDPE_MASK;
unsigned pde = start  GEN8_PDE_SHIFT  GEN8_PDE_MASK;
unsigned pte = start  GEN8_PTE_SHIFT  GEN8_PTE_MASK;
unsigned num_entries = length  PAGE_SHIFT;
unsigned last_pte, i;
 
-   scratch_pte = gen8_pte_encode(px_dma(ppgtt-base.scratch_page),
- I915_CACHE_LLC, use_scratch);
-
while (num_entries) {
struct i915_page_directory *pd;
struct i915_page_table *pt;
@@ -656,14 +653,30 @@ static void gen8_ppgtt_clear_range(struct 
i915_address_space *vm,
}
 }
 
-static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
- struct sg_table *pages,
- uint64_t start,
- enum i915_cache_level cache_level, u32 
unused)
+static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
+  uint64_t start,
+  uint64_t length,
+  bool use_scratch)
 {
struct i915_hw_ppgtt *ppgtt =
container_of(vm, struct i915_hw_ppgtt, base);
struct i915_page_directory_pointer *pdp = ppgtt-pdp; /* FIXME: 48b */
+
+   gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm-scratch_page),
+I915_CACHE_LLC, use_scratch);
+
+   gen8_ppgtt_clear_pte_range(vm, pdp, start, length, scratch_pte);
+}
+
+static void
+gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
+ struct i915_page_directory_pointer *pdp,
+ struct sg_table *pages,
+ uint64_t start,
+ enum i915_cache_level cache_level)
+{
+   struct i915_hw_ppgtt *ppgtt =
+   container_of(vm, struct i915_hw_ppgtt, base);
gen8_pte_t *pt_vaddr;
unsigned pdpe = start  GEN8_PDPE_SHIFT  GEN8_PDPE_MASK;
unsigned pde = start  GEN8_PDE_SHIFT  GEN8_PDE_MASK;
@@ -700,6 +713,19 @@ static void gen8_ppgtt_insert_entries(struct 
i915_address_space *vm,
kunmap_px(ppgtt, pt_vaddr);
 }
 
+static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
+ struct sg_table *pages,
+ uint64_t start,
+ enum i915_cache_level cache_level,
+ u32 unused)
+{
+   struct i915_hw_ppgtt *ppgtt =
+   container_of(vm, struct i915_hw_ppgtt, base);
+   struct i915_page_directory_pointer *pdp = ppgtt-pdp; /* FIXME: 48b */
+
+   gen8_ppgtt_insert_pte_entries(vm, pdp, pages, start, cache_level);
+}
+
 static void gen8_free_page_tables(struct drm_device *dev,

[Intel-gfx] [PATCH v6 18/19] drm/i915/gen8: Flip the 48b switch

2015-07-29 Thread Michel Thierry
Use 48b addresses if hw supports it (i915.enable_ppgtt=3).

Note, aliasing PPGTT remains 32b only.

Signed-off-by: Michel Thierry michel.thie...@intel.com
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 5 ++---
 drivers/gpu/drm/i915/i915_params.c  | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 0d7c7c1..a7d3c07 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -108,8 +108,7 @@ static int sanitize_enable_ppgtt(struct drm_device *dev, 
int enable_ppgtt)
 
has_aliasing_ppgtt = INTEL_INFO(dev)-gen = 6;
has_full_ppgtt = INTEL_INFO(dev)-gen = 7;
-   has_full_64bit_ppgtt = (IS_BROADWELL(dev) ||
-   INTEL_INFO(dev)-gen = 9)  false; /* FIXME: 
64b */
+   has_full_64bit_ppgtt = IS_BROADWELL(dev) || INTEL_INFO(dev)-gen = 9;
 
if (intel_vgpu_active(dev))
has_full_ppgtt = false; /* emulation is too hard */
@@ -147,7 +146,7 @@ static int sanitize_enable_ppgtt(struct drm_device *dev, 
int enable_ppgtt)
}
 
if (INTEL_INFO(dev)-gen = 8  i915.enable_execlists)
-   return 2;
+   return has_full_64bit_ppgtt ? 3 : 2;
else
return has_aliasing_ppgtt ? 1 : 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_params.c 
b/drivers/gpu/drm/i915/i915_params.c
index 5ae4b0a..d961440 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -111,7 +111,7 @@ MODULE_PARM_DESC(enable_hangcheck,
 module_param_named_unsafe(enable_ppgtt, i915.enable_ppgtt, int, 0400);
 MODULE_PARM_DESC(enable_ppgtt,
Override PPGTT usage. 
-   (-1=auto [default], 0=disabled, 1=aliasing, 2=full));
+   (-1=auto [default], 0=disabled, 1=aliasing, 2=full, 3=full_64b));
 
 module_param_named(enable_execlists, i915.enable_execlists, int, 0400);
 MODULE_PARM_DESC(enable_execlists,
-- 
2.4.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v6 19/19] drm/i915: Save some page table setup on repeated binds

2015-07-29 Thread Michel Thierry
Check if the required page tables are already allocated, if so, we can
skip altogether the inner loop of pdes, and move to the next page
directory.

If the new allocation is different than the existing one (i.e. new
allocation spans more ptes than already covered from earlier allocations),
the used_ptes bitmap may not get updated correctly, but none of the
code-checks rely on this.

Suggested-by: Akash Goel akash.g...@intel.com
Signed-off-by: Michel Thierry michel.thie...@intel.com
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index a7d3c07..13cf23c 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1249,6 +1249,16 @@ static int gen8_alloc_va_range_3lvl(struct 
i915_address_space *vm,
/* Every pd should be allocated, we just did that above. */
WARN_ON(!pd);
 
+   /* Check if the required page tables are already allocated /
+* mapped; if so, we can skip altogether the inner loop of pdes,
+* and move to the next page directory.
+*/
+   if (bitmap_subset(new_page_tables[pdpe], pd-used_pdes,
+ I915_PDES)) {
+   kunmap_px(ppgtt, page_directory);
+   continue;
+   }
+
gen8_for_each_pde(pt, pd, pd_start, pd_len, temp, pde) {
/* Same reasoning as pd */
WARN_ON(!pt);
-- 
2.4.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v6 10/19] drm/i915/gen8: Add 4 level support in insert_entries and clear_range

2015-07-29 Thread Michel Thierry
When 48b is enabled, gen8_ppgtt_insert_entries needs to read the Page Map
Level 4 (PML4), before it selects which Page Directory Pointer (PDP)
it will write to.

Similarly, gen8_ppgtt_clear_range needs to get the correct PDP/PD range.

This patch was inspired by Ben's Depend exclusively on map and
unmap_vma.

v2: Rebase after s/page_tables/page_table/.
v3: Remove unnecessary pdpe loop in gen8_ppgtt_clear_range_4lvl and use
clamp_pdp in gen8_ppgtt_insert_entries (Akash).
v4: Merge gen8_ppgtt_clear_range_4lvl into gen8_ppgtt_clear_range to
maintain symmetry with gen8_ppgtt_insert_entries (Akash).
v5: Do not mix pages and bytes in insert_entries (Akash).
v6: Prevent overflow in sg_nents  PAGE_SHIFT, when inserting 4GB at
once.
v7: Rebase after Mika's ppgtt cleanup / scratch merge patch series.
Use gen8_px_index functions, and remove unnecessary number of pages
parameter in insert_pte_entries.
v8: Change gen8_ppgtt_clear_pte_range to stop at PDP boundary, instead of
adding and extra clamp function; remove unnecessary pdp_start/pdp_len
variables (Akash).
v9: pages-orig_nents instead of sg_nents(pages-sgl) to get the
length (Akash).

Cc: Akash Goel akash.g...@intel.com
Signed-off-by: Michel Thierry michel.thie...@intel.com
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 49 +++--
 1 file changed, 36 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 7c024e98..7070d42 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -687,9 +687,9 @@ static void gen8_ppgtt_clear_pte_range(struct 
i915_address_space *vm,
struct i915_hw_ppgtt *ppgtt =
container_of(vm, struct i915_hw_ppgtt, base);
gen8_pte_t *pt_vaddr;
-   unsigned pdpe = start  GEN8_PDPE_SHIFT  GEN8_PDPE_MASK;
-   unsigned pde = start  GEN8_PDE_SHIFT  GEN8_PDE_MASK;
-   unsigned pte = start  GEN8_PTE_SHIFT  GEN8_PTE_MASK;
+   unsigned pdpe = gen8_pdpe_index(start);
+   unsigned pde = gen8_pde_index(start);
+   unsigned pte = gen8_pte_index(start);
unsigned num_entries = length  PAGE_SHIFT;
unsigned last_pte, i;
 
@@ -725,7 +725,8 @@ static void gen8_ppgtt_clear_pte_range(struct 
i915_address_space *vm,
 
pte = 0;
if (++pde == I915_PDES) {
-   pdpe++;
+   if (++pdpe == I915_PDPES_PER_PDP(vm-dev))
+   break;
pde = 0;
}
}
@@ -738,12 +739,21 @@ static void gen8_ppgtt_clear_range(struct 
i915_address_space *vm,
 {
struct i915_hw_ppgtt *ppgtt =
container_of(vm, struct i915_hw_ppgtt, base);
-   struct i915_page_directory_pointer *pdp = ppgtt-pdp; /* FIXME: 48b */
-
gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm-scratch_page),
 I915_CACHE_LLC, use_scratch);
 
-   gen8_ppgtt_clear_pte_range(vm, pdp, start, length, scratch_pte);
+   if (!USES_FULL_48BIT_PPGTT(vm-dev)) {
+   gen8_ppgtt_clear_pte_range(vm, ppgtt-pdp, start, length,
+  scratch_pte);
+   } else {
+   uint64_t templ4, pml4e;
+   struct i915_page_directory_pointer *pdp;
+
+   gen8_for_each_pml4e(pdp, ppgtt-pml4, start, length, templ4, 
pml4e) {
+   gen8_ppgtt_clear_pte_range(vm, pdp, start, length,
+  scratch_pte);
+   }
+   }
 }
 
 static void
@@ -756,9 +766,9 @@ gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
struct i915_hw_ppgtt *ppgtt =
container_of(vm, struct i915_hw_ppgtt, base);
gen8_pte_t *pt_vaddr;
-   unsigned pdpe = start  GEN8_PDPE_SHIFT  GEN8_PDPE_MASK;
-   unsigned pde = start  GEN8_PDE_SHIFT  GEN8_PDE_MASK;
-   unsigned pte = start  GEN8_PTE_SHIFT  GEN8_PTE_MASK;
+   unsigned pdpe = gen8_pdpe_index(start);
+   unsigned pde = gen8_pde_index(start);
+   unsigned pte = gen8_pte_index(start);
 
pt_vaddr = NULL;
 
@@ -776,7 +786,8 @@ gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
kunmap_px(ppgtt, pt_vaddr);
pt_vaddr = NULL;
if (++pde == I915_PDES) {
-   pdpe++;
+   if (++pdpe == I915_PDPES_PER_PDP(vm-dev))
+   break;
pde = 0;
}
pte = 0;
@@ -795,11 +806,23 @@ static void gen8_ppgtt_insert_entries(struct 
i915_address_space *vm,
 {
struct i915_hw_ppgtt *ppgtt =
container_of(vm, struct i915_hw_ppgtt, base);
-   struct i915_page_directory_pointer *pdp = ppgtt-pdp; /* FIXME: 48b */
struct sg_page_iter sg_iter;
 

[Intel-gfx] [PATCH v6 11/19] drm/i915/gen8: Initialize PDPs and PML4

2015-07-29 Thread Michel Thierry
Similar to PDs, while setting up a page directory pointer, make all entries
of the pdp point to the scratch pd before mapping (and make all its entries
point to the scratch page); this is to be safe in case of out of bound
access or  proactive prefetch.

Also add a scratch pdp, which the PML4 entries point to.

v2: Handle scratch_pdp allocation failure correctly, and keep
initialize_px functions together (Akash)
v3: Rebase after Mika's ppgtt cleanup / scratch merge patch series. Rely on
the added macros to initialize the pdps.
v4: Rebase after final merged version of Mika's ppgtt/scratch patches
(and removed commit message part related to v3).
v5: Update commit message to also mention PML4 table initialization and
the new scratch pdp (Akash).

Suggested-by: Akash Goel akash.g...@intel.com
Signed-off-by: Michel Thierry michel.thie...@intel.com
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 38 +
 drivers/gpu/drm/i915/i915_gem_gtt.h |  1 +
 2 files changed, 39 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 7070d42..73cfe56 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -602,6 +602,27 @@ static void free_pdp(struct drm_device *dev,
}
 }
 
+static void gen8_initialize_pdp(struct i915_address_space *vm,
+   struct i915_page_directory_pointer *pdp)
+{
+   gen8_ppgtt_pdpe_t scratch_pdpe;
+
+   scratch_pdpe = gen8_pdpe_encode(px_dma(vm-scratch_pd), I915_CACHE_LLC);
+
+   fill_px(vm-dev, pdp, scratch_pdpe);
+}
+
+static void gen8_initialize_pml4(struct i915_address_space *vm,
+struct i915_pml4 *pml4)
+{
+   gen8_ppgtt_pml4e_t scratch_pml4e;
+
+   scratch_pml4e = gen8_pml4e_encode(px_dma(vm-scratch_pdp),
+ I915_CACHE_LLC);
+
+   fill_px(vm-dev, pml4, scratch_pml4e);
+}
+
 static void
 gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt,
  struct i915_page_directory_pointer *pdp,
@@ -863,8 +884,20 @@ static int gen8_init_scratch(struct i915_address_space *vm)
return PTR_ERR(vm-scratch_pd);
}
 
+   if (USES_FULL_48BIT_PPGTT(dev)) {
+   vm-scratch_pdp = alloc_pdp(dev);
+   if (IS_ERR(vm-scratch_pdp)) {
+   free_pd(dev, vm-scratch_pd);
+   free_pt(dev, vm-scratch_pt);
+   free_scratch_page(dev, vm-scratch_page);
+   return PTR_ERR(vm-scratch_pdp);
+   }
+   }
+
gen8_initialize_pt(vm, vm-scratch_pt);
gen8_initialize_pd(vm, vm-scratch_pd);
+   if (USES_FULL_48BIT_PPGTT(dev))
+   gen8_initialize_pdp(vm, vm-scratch_pdp);
 
return 0;
 }
@@ -873,6 +906,8 @@ static void gen8_free_scratch(struct i915_address_space *vm)
 {
struct drm_device *dev = vm-dev;
 
+   if (USES_FULL_48BIT_PPGTT(dev))
+   free_pdp(dev, vm-scratch_pdp);
free_pd(dev, vm-scratch_pd);
free_pt(dev, vm-scratch_pt);
free_scratch_page(dev, vm-scratch_page);
@@ -1074,6 +1109,7 @@ gen8_ppgtt_alloc_page_dirpointers(struct 
i915_address_space *vm,
if (IS_ERR(pdp))
goto unwind_out;
 
+   gen8_initialize_pdp(vm, pdp);
pml4-pdps[pml4e] = pdp;
__set_bit(pml4e, new_pdps);
trace_i915_page_directory_pointer_entry_alloc(vm,
@@ -1353,6 +1389,8 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
if (ret)
goto free_scratch;
 
+   gen8_initialize_pml4(ppgtt-base, ppgtt-pml4);
+
ppgtt-base.total = 1ULL  48;
ppgtt-switch_mm = gen8_48b_mm_switch;
} else {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h 
b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 11d44b3..70c50e7 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -278,6 +278,7 @@ struct i915_address_space {
struct i915_page_scratch *scratch_page;
struct i915_page_table *scratch_pt;
struct i915_page_directory *scratch_pd;
+   struct i915_page_directory_pointer *scratch_pdp; /* GEN8+  48b PPGTT */
 
/**
 * List of objects currently involved in rendering.
-- 
2.4.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v6 02/19] drm/i915/gen8: Make pdp allocation more dynamic

2015-07-29 Thread Michel Thierry
This transitional patch doesn't do much for the existing code. However,
it should make upcoming patches to use the full 48b address space a bit
easier.

v2: Renamed  pdp_free to be similar to  pd/pt (unmap_and_free_pdp).
v3: To facilitate testing, 48b mode will be available on Broadwell and
GEN9+, when i915.enable_ppgtt = 3.
v4: Rebase after s/page_tables/page_table/, added extra information
about 4-level page table formats and use IS_ENABLED macro.
v5: Check CONFIG_X86_64 instead of CONFIG_64BIT.
v6: Rebase after Mika's ppgtt cleanup / scratch merge patch series, and
follow
his nomenclature in pdp functions (there is no alloc_pdp yet).
v7: Rebase after merged version of Mika's ppgtt cleanup patch series.
v8: Rebase after final merged version of Mika's ppgtt/scratch patches.
v9: Introduce PML4 (and 48-bit checks) until next patch (Akash).
v10: Also use test_bit to detect when pd/pt are already allocated (Akash)

Cc: Akash Goel akash.g...@intel.com
Signed-off-by: Ben Widawsky b...@bwidawsk.net
Signed-off-by: Michel Thierry michel.thie...@intel.com (v2+)
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 86 +
 drivers/gpu/drm/i915/i915_gem_gtt.h | 17 +---
 2 files changed, 80 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 189572d..28f3227 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -522,6 +522,43 @@ static void gen8_initialize_pd(struct i915_address_space 
*vm,
fill_px(vm-dev, pd, scratch_pde);
 }
 
+static int __pdp_init(struct drm_device *dev,
+ struct i915_page_directory_pointer *pdp)
+{
+   size_t pdpes = I915_PDPES_PER_PDP(dev);
+
+   pdp-used_pdpes = kcalloc(BITS_TO_LONGS(pdpes),
+ sizeof(unsigned long),
+ GFP_KERNEL);
+   if (!pdp-used_pdpes)
+   return -ENOMEM;
+
+   pdp-page_directory = kcalloc(pdpes, sizeof(*pdp-page_directory),
+ GFP_KERNEL);
+   if (!pdp-page_directory) {
+   kfree(pdp-used_pdpes);
+   /* the PDP might be the statically allocated top level. Keep it
+* as clean as possible */
+   pdp-used_pdpes = NULL;
+   return -ENOMEM;
+   }
+
+   return 0;
+}
+
+static void __pdp_fini(struct i915_page_directory_pointer *pdp)
+{
+   kfree(pdp-used_pdpes);
+   kfree(pdp-page_directory);
+   pdp-page_directory = NULL;
+}
+
+static void free_pdp(struct drm_device *dev,
+struct i915_page_directory_pointer *pdp)
+{
+   __pdp_fini(pdp);
+}
+
 /* Broadwell Page Directory Pointer Descriptors */
 static int gen8_write_pdp(struct drm_i915_gem_request *req,
  unsigned entry,
@@ -720,7 +757,8 @@ static void gen8_ppgtt_cleanup(struct i915_address_space 
*vm)
container_of(vm, struct i915_hw_ppgtt, base);
int i;
 
-   for_each_set_bit(i, ppgtt-pdp.used_pdpes, GEN8_LEGACY_PDPES) {
+   for_each_set_bit(i, ppgtt-pdp.used_pdpes,
+I915_PDPES_PER_PDP(ppgtt-base.dev)) {
if (WARN_ON(!ppgtt-pdp.page_directory[i]))
continue;
 
@@ -729,6 +767,7 @@ static void gen8_ppgtt_cleanup(struct i915_address_space 
*vm)
free_pd(ppgtt-base.dev, ppgtt-pdp.page_directory[i]);
}
 
+   free_pdp(ppgtt-base.dev, ppgtt-pdp);
gen8_free_scratch(vm);
 }
 
@@ -763,7 +802,7 @@ static int gen8_ppgtt_alloc_pagetabs(struct i915_hw_ppgtt 
*ppgtt,
 
gen8_for_each_pde(pt, pd, start, length, temp, pde) {
/* Don't reallocate page tables */
-   if (pt) {
+   if (test_bit(pde, pd-used_pdes)) {
/* Scratch is never allocated this way */
WARN_ON(pt == ppgtt-base.scratch_pt);
continue;
@@ -820,11 +859,12 @@ static int gen8_ppgtt_alloc_page_directories(struct 
i915_hw_ppgtt *ppgtt,
struct i915_page_directory *pd;
uint64_t temp;
uint32_t pdpe;
+   uint32_t pdpes = I915_PDPES_PER_PDP(dev);
 
-   WARN_ON(!bitmap_empty(new_pds, GEN8_LEGACY_PDPES));
+   WARN_ON(!bitmap_empty(new_pds, pdpes));
 
gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
-   if (pd)
+   if (test_bit(pdpe, pdp-used_pdpes))
continue;
 
pd = alloc_pd(dev);
@@ -839,18 +879,19 @@ static int gen8_ppgtt_alloc_page_directories(struct 
i915_hw_ppgtt *ppgtt,
return 0;
 
 unwind_out:
-   for_each_set_bit(pdpe, new_pds, GEN8_LEGACY_PDPES)
+   for_each_set_bit(pdpe, new_pds, pdpes)
free_pd(dev, pdp-page_directory[pdpe]);
 
return -ENOMEM;
 }
 
 static void
-free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long **new_pts)

[Intel-gfx] [PATCH v6 16/19] drm/i915/userptr: Kill user_size limit check

2015-07-29 Thread Michel Thierry
GTT was only 32b and its max value is 4GB. In order to allow objects
bigger than 4GB in 48b PPGTT, i915_gem_userptr_ioctl we could check
against max 48b range (1ULL  48).

But since the check no longer applies, just kill the limit.

v2: Use the default ctx to infer the ppgtt max size (Akash).
v3: Just kill the limit, it was only there for early detection of an
error when used for execbuffer (Chris).

Cc: Akash Goel akash.g...@intel.com
Reviewed-by: Chris Wilson ch...@chris-wilson.co.uk
Signed-off-by: Michel Thierry michel.thie...@intel.com
---
 drivers/gpu/drm/i915/i915_gem_userptr.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c 
b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 8fd431b..d11901d 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -813,7 +813,6 @@ static const struct drm_i915_gem_object_ops 
i915_gem_userptr_ops = {
 int
 i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file 
*file)
 {
-   struct drm_i915_private *dev_priv = dev-dev_private;
struct drm_i915_gem_userptr *args = data;
struct drm_i915_gem_object *obj;
int ret;
@@ -826,9 +825,6 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, 
struct drm_file *file
if (offset_in_page(args-user_ptr | args-user_size))
return -EINVAL;
 
-   if (args-user_size  dev_priv-gtt.base.total)
-   return -E2BIG;
-
if (!access_ok(args-flags  I915_USERPTR_READ_ONLY ? VERIFY_READ : 
VERIFY_WRITE,
   (char __user *)(unsigned long)args-user_ptr, 
args-user_size))
return -EFAULT;
-- 
2.4.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v6 03/19] drm/i915/gen8: Abstract PDP usage

2015-07-29 Thread Michel Thierry
Up until now, ppgtt-pdp has always been the root of our page tables.
Legacy 32b addresses acted like it had 1 PDP with 4 PDPEs.

In preparation for 4 level page tables, we need to stop use ppgtt-pdp
directly unless we know it's what we want. The future structure will use
ppgtt-pml4 for the top level, and the pdp is just one of the entries
being pointed to by a pml4e. Places where this is not yet possible use a
temporal pdp local variable.

v2: Updated after dynamic page allocation changes.
v3: Rebase after s/page_tables/page_table/.
v4: Rebase after changes in Dynamic page table allocations patch.
v5: Rebase after Mika's ppgtt cleanup / scratch merge patch series.
v6: Rebase after final merged version of Mika's ppgtt/scratch patches.
v7: Keep pagetable map in-line (and avoid unnecessary for_each_pde
loops), remove redundant ppgtt pointer in _alloc_pagetabs (Akash)
v8: Fix text indentation in _alloc_pagetabs/page_directories (Chris)
v9: Defer gen8_alloc_va_range_4lvl definition until 4lvl is implemented,
clean-up gen8_ppgtt_cleanup [pun intended] (Akash).

Cc: Akash Goel akash.g...@intel.com
Signed-off-by: Ben Widawsky b...@bwidawsk.net
Signed-off-by: Michel Thierry michel.thie...@intel.com (v2+)
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 84 +++--
 1 file changed, 44 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 28f3227..bd56979 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -607,6 +607,7 @@ static void gen8_ppgtt_clear_range(struct 
i915_address_space *vm,
 {
struct i915_hw_ppgtt *ppgtt =
container_of(vm, struct i915_hw_ppgtt, base);
+   struct i915_page_directory_pointer *pdp = ppgtt-pdp; /* FIXME: 48b */
gen8_pte_t *pt_vaddr, scratch_pte;
unsigned pdpe = start  GEN8_PDPE_SHIFT  GEN8_PDPE_MASK;
unsigned pde = start  GEN8_PDE_SHIFT  GEN8_PDE_MASK;
@@ -621,10 +622,10 @@ static void gen8_ppgtt_clear_range(struct 
i915_address_space *vm,
struct i915_page_directory *pd;
struct i915_page_table *pt;
 
-   if (WARN_ON(!ppgtt-pdp.page_directory[pdpe]))
+   if (WARN_ON(!pdp-page_directory[pdpe]))
break;
 
-   pd = ppgtt-pdp.page_directory[pdpe];
+   pd = pdp-page_directory[pdpe];
 
if (WARN_ON(!pd-page_table[pde]))
break;
@@ -662,6 +663,7 @@ static void gen8_ppgtt_insert_entries(struct 
i915_address_space *vm,
 {
struct i915_hw_ppgtt *ppgtt =
container_of(vm, struct i915_hw_ppgtt, base);
+   struct i915_page_directory_pointer *pdp = ppgtt-pdp; /* FIXME: 48b */
gen8_pte_t *pt_vaddr;
unsigned pdpe = start  GEN8_PDPE_SHIFT  GEN8_PDPE_MASK;
unsigned pde = start  GEN8_PDE_SHIFT  GEN8_PDE_MASK;
@@ -675,7 +677,7 @@ static void gen8_ppgtt_insert_entries(struct 
i915_address_space *vm,
break;
 
if (pt_vaddr == NULL) {
-   struct i915_page_directory *pd = 
ppgtt-pdp.page_directory[pdpe];
+   struct i915_page_directory *pd = 
pdp-page_directory[pdpe];
struct i915_page_table *pt = pd-page_table[pde];
pt_vaddr = kmap_px(pt);
}
@@ -755,28 +757,29 @@ static void gen8_ppgtt_cleanup(struct i915_address_space 
*vm)
 {
struct i915_hw_ppgtt *ppgtt =
container_of(vm, struct i915_hw_ppgtt, base);
+   struct i915_page_directory_pointer *pdp = ppgtt-pdp; /* FIXME: 48b */
+   struct drm_device *dev = ppgtt-base.dev;
int i;
 
-   for_each_set_bit(i, ppgtt-pdp.used_pdpes,
-I915_PDPES_PER_PDP(ppgtt-base.dev)) {
-   if (WARN_ON(!ppgtt-pdp.page_directory[i]))
+   for_each_set_bit(i, pdp-used_pdpes, I915_PDPES_PER_PDP(dev)) {
+   if (WARN_ON(!pdp-page_directory[i]))
continue;
 
-   gen8_free_page_tables(ppgtt-base.dev,
- ppgtt-pdp.page_directory[i]);
-   free_pd(ppgtt-base.dev, ppgtt-pdp.page_directory[i]);
+   gen8_free_page_tables(dev, pdp-page_directory[i]);
+   free_pd(dev, pdp-page_directory[i]);
}
 
-   free_pdp(ppgtt-base.dev, ppgtt-pdp);
+   free_pdp(dev, pdp);
+
gen8_free_scratch(vm);
 }
 
 /**
  * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range.
- * @ppgtt: Master ppgtt structure.
- * @pd:Page directory for this address range.
+ * @vm:Master vm structure.
+ * @pd:Page directory for this address range.
  * @start: Starting virtual address to begin allocations.
- * @length Size of the allocations.
+ * @length:Size of the allocations.
  * @new_pts:   Bitmap set by function with new allocations. Likely used by 

[Intel-gfx] [PATCH v6 05/19] drm/i915/gen8: Add dynamic page trace events

2015-07-29 Thread Michel Thierry
The dynamic page allocation patch series added it for GEN6, this patch
adds them for GEN8.

v2: Consolidate pagetable/page_directory events
v3: Multiple rebases.
v4: Rebase after s/page_tables/page_table/.
v5: Rebase after Mika's ppgtt cleanup / scratch merge patch series.
v6: Rebase after gen8_map_pagetable_range removal.
v7: Use generic page name (px) in DECLARE_EVENT_CLASS (Akash)
v8: Defer define of i915_page_directory_pointer_entry_alloc (Akash)

Cc: Akash Goel akash.g...@intel.com
Signed-off-by: Ben Widawsky b...@bwidawsk.net
Signed-off-by: Michel Thierry michel.thie...@intel.com (v3+)
---
 drivers/gpu/drm/i915/i915_gem_gtt.c |  6 ++
 drivers/gpu/drm/i915/i915_trace.h   | 24 
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index f338a13..8c1db92 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -844,6 +844,7 @@ static int gen8_ppgtt_alloc_pagetabs(struct 
i915_address_space *vm,
gen8_initialize_pt(vm, pt);
pd-page_table[pde] = pt;
__set_bit(pde, new_pts);
+   trace_i915_page_table_entry_alloc(vm, pde, start, 
GEN8_PDE_SHIFT);
}
 
return 0;
@@ -904,6 +905,7 @@ gen8_ppgtt_alloc_page_directories(struct i915_address_space 
*vm,
gen8_initialize_pd(vm, pd);
pdp-page_directory[pdpe] = pd;
__set_bit(pdpe, new_pds);
+   trace_i915_page_directory_entry_alloc(vm, pdpe, start, 
GEN8_PDPE_SHIFT);
}
 
return 0;
@@ -1053,6 +1055,10 @@ static int gen8_alloc_va_range(struct i915_address_space 
*vm,
/* Map the PDE to the page table */
page_directory[pde] = gen8_pde_encode(px_dma(pt),
  I915_CACHE_LLC);
+   trace_i915_page_table_entry_map(ppgtt-base, pde, pt,
+   gen8_pte_index(start),
+   gen8_pte_count(start, 
length),
+   GEN8_PTES);
 
/* NB: We haven't yet mapped ptes to pages. At this
 * point we're still relying on insert_entries() */
diff --git a/drivers/gpu/drm/i915/i915_trace.h 
b/drivers/gpu/drm/i915/i915_trace.h
index 2f34c47..f230d76 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -186,33 +186,41 @@ DEFINE_EVENT(i915_va, i915_va_alloc,
 TP_ARGS(vm, start, length, name)
 );
 
-DECLARE_EVENT_CLASS(i915_page_table_entry,
-   TP_PROTO(struct i915_address_space *vm, u32 pde, u64 start, u64 
pde_shift),
-   TP_ARGS(vm, pde, start, pde_shift),
+DECLARE_EVENT_CLASS(i915_px_entry,
+   TP_PROTO(struct i915_address_space *vm, u32 px, u64 start, u64 
px_shift),
+   TP_ARGS(vm, px, start, px_shift),
 
TP_STRUCT__entry(
__field(struct i915_address_space *, vm)
-   __field(u32, pde)
+   __field(u32, px)
__field(u64, start)
__field(u64, end)
),
 
TP_fast_assign(
__entry-vm = vm;
-   __entry-pde = pde;
+   __entry-px = px;
__entry-start = start;
-   __entry-end = ((start + (1ULL  pde_shift))  ~((1ULL  
pde_shift)-1)) - 1;
+   __entry-end = ((start + (1ULL  px_shift))  ~((1ULL  
px_shift)-1)) - 1;
),
 
TP_printk(vm=%p, pde=%d (0x%llx-0x%llx),
- __entry-vm, __entry-pde, __entry-start, __entry-end)
+ __entry-vm, __entry-px, __entry-start, __entry-end)
 );
 
-DEFINE_EVENT(i915_page_table_entry, i915_page_table_entry_alloc,
+DEFINE_EVENT(i915_px_entry, i915_page_table_entry_alloc,
 TP_PROTO(struct i915_address_space *vm, u32 pde, u64 start, u64 
pde_shift),
 TP_ARGS(vm, pde, start, pde_shift)
 );
 
+DEFINE_EVENT_PRINT(i915_px_entry, i915_page_directory_entry_alloc,
+  TP_PROTO(struct i915_address_space *vm, u32 pdpe, u64 start, 
u64 pdpe_shift),
+  TP_ARGS(vm, pdpe, start, pdpe_shift),
+
+  TP_printk(vm=%p, pdpe=%d (0x%llx-0x%llx),
+__entry-vm, __entry-px, __entry-start, 
__entry-end)
+);
+
 /* Avoid extra math because we only support two sizes. The format is defined by
  * bitmap_scnprintf. Each 32 bits is 8 HEX digits followed by comma */
 #define TRACE_PT_SIZE(bits) \
-- 
2.4.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v6 07/19] drm/i915/gen8: implement alloc/free for 4lvl

2015-07-29 Thread Michel Thierry
PML4 has no special attributes, and there will always be a PML4.
So simply initialize it at creation, and destroy it at the end.

The code for 4lvl is able to call into the existing 3lvl page table code
to handle all of the lower levels.

v2: Return something at the end of gen8_alloc_va_range_4lvl to keep the
compiler happy. And define ret only in one place.
Updated gen8_ppgtt_unmap_pages and gen8_ppgtt_free to handle 4lvl.
v3: Use i915_dma_unmap_single instead of pci API. Fix a
couple of incorrect checks when unmapping pdp and pd pages (Akash).
v4: Call __pdp_fini also for 32b PPGTT. Clean up alloc_pdp param list.
v5: Prevent (harmless) out of range access in gen8_for_each_pml4e.
v6: Simplify alloc_vma_range_4lvl and gen8_ppgtt_init_common error
paths. (Akash)
v7: Rebase, s/gen8_ppgtt_free_*/gen8_ppgtt_cleanup_*/.
v8: Change location of pml4_init/fini. It will make next patches
cleaner.
v9: Rebase after Mika's ppgtt cleanup / scratch merge patch series, while
trying to reuse as much as possible for pdp alloc. pml4_init/fini
replaced by setup/cleanup_px macros.
v10: Rebase after Mika's merged ppgtt cleanup patch series.
v11: Rebase after final merged version of Mika's ppgtt/scratch
patches.
v12: Fix pdpe start value in trace (Akash)
v13: Define all 4lvl functions in this patch directly, instead of
previous patches, add i915_page_directory_pointer_entry_alloc here,
use test_bit to detect when pdp is already allocated (Akash).
v14: Move pdp allocation into a new gen8_ppgtt_alloc_page_dirpointers
function, as we do for pds and pts; move pd and pdp setup functions to
this patch (Akash)

Cc: Akash Goel akash.g...@intel.com
Signed-off-by: Ben Widawsky b...@bwidawsk.net
Signed-off-by: Michel Thierry michel.thie...@intel.com (v2+)
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 239 +---
 drivers/gpu/drm/i915/i915_gem_gtt.h |  15 ++-
 drivers/gpu/drm/i915/i915_trace.h   |   8 ++
 3 files changed, 245 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 1a120a4..4179b80 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -210,6 +210,9 @@ static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
return pde;
 }
 
+#define gen8_pdpe_encode gen8_pde_encode
+#define gen8_pml4e_encode gen8_pde_encode
+
 static gen6_pte_t snb_pte_encode(dma_addr_t addr,
 enum i915_cache_level level,
 bool valid, u32 unused)
@@ -559,12 +562,73 @@ static void __pdp_fini(struct i915_page_directory_pointer 
*pdp)
pdp-page_directory = NULL;
 }
 
+static struct
+i915_page_directory_pointer *alloc_pdp(struct drm_device *dev)
+{
+   struct i915_page_directory_pointer *pdp;
+   int ret = -ENOMEM;
+
+   WARN_ON(!USES_FULL_48BIT_PPGTT(dev));
+
+   pdp = kzalloc(sizeof(*pdp), GFP_KERNEL);
+   if (!pdp)
+   return ERR_PTR(-ENOMEM);
+
+   ret = __pdp_init(dev, pdp);
+   if (ret)
+   goto fail_bitmap;
+
+   ret = setup_px(dev, pdp);
+   if (ret)
+   goto fail_page_m;
+
+   return pdp;
+
+fail_page_m:
+   __pdp_fini(pdp);
+fail_bitmap:
+   kfree(pdp);
+
+   return ERR_PTR(ret);
+}
+
 static void free_pdp(struct drm_device *dev,
 struct i915_page_directory_pointer *pdp)
 {
__pdp_fini(pdp);
-   if (USES_FULL_48BIT_PPGTT(dev))
+   if (USES_FULL_48BIT_PPGTT(dev)) {
+   cleanup_px(dev, pdp);
kfree(pdp);
+   }
+}
+
+static void
+gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt,
+ struct i915_page_directory_pointer *pdp,
+ struct i915_page_directory *pd,
+ int index)
+{
+   gen8_ppgtt_pdpe_t *page_directorypo;
+
+   if (!USES_FULL_48BIT_PPGTT(ppgtt-base.dev))
+   return;
+
+   page_directorypo = kmap_px(pdp);
+   page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
+   kunmap_px(ppgtt, page_directorypo);
+}
+
+static void
+gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt,
+ struct i915_pml4 *pml4,
+ struct i915_page_directory_pointer *pdp,
+ int index)
+{
+   gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4);
+
+   WARN_ON(!USES_FULL_48BIT_PPGTT(ppgtt-base.dev));
+   pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
+   kunmap_px(ppgtt, pagemap);
 }
 
 /* Broadwell Page Directory Pointer Descriptors */
@@ -784,12 +848,9 @@ static void gen8_free_scratch(struct i915_address_space 
*vm)
free_scratch_page(dev, vm-scratch_page);
 }
 
-static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
+static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev,
+   struct i915_page_directory_pointer *pdp)
 

Re: [Intel-gfx] [PATCH i-g-t] tests/gem_render_linear_blits: Increase min swap required

2015-07-29 Thread Dave Gordon

On 29/07/15 14:15, Chris Wilson wrote:

On Wed, Jul 29, 2015 at 01:10:23PM +, Gore, Tim wrote:

I don’t see how this implies a kernel bug. It seems like a test problem (my
subtest as it happens). I was unaware of Android systems with small swap
partitions (or indeed any swap at all). Not sure I can understand the logic of
such a tiny swap partition but given the situation, unless we can accurately
characterise the memory usage of the test in advance then we have to
either skip the test for small swap, or try to monitor memory usage in an
ongoing way during the test.


If the system has enough resources to run the test (that is enough
physical to run an individual batch plus enough swap to hold the rest),
then the test must not oom.
-Chris


The test is deliberately attempting to use enough memory to force some 
stuff out to swap, while not hitting a total OOM. That can be a very 
narrow window when the swapspace is small; and the test just guesses in 
advance how much will do the trick rather than gradually increasing its 
demands until it detects that stuff is being swapped.


So not a kernel bug, but something of a failure in the implementation of 
the test. Is there an interface it could use to /detect and measure/ 
when stuff is swapped out?


.Dave.
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v6 06/19] drm/i915/gen8: Add PML4 structure

2015-07-29 Thread Michel Thierry
Introduces the Page Map Level 4 (PML4), ie. the new top level structure
of the page tables.

To facilitate testing, 48b mode will be available on Broadwell and
GEN9+, when i915.enable_ppgtt = 3.

v2: Remove unnecessary CONFIG_X86_64 checks, ppgtt code is already
32/64-bit safe (Chris).
v3: Add goto free_scratch in temp 48-bit mode init code (Akash).

Cc: Akash Goel akash.g...@intel.com
Signed-off-by: Michel Thierry michel.thie...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.h |  3 ++-
 drivers/gpu/drm/i915/i915_gem_gtt.c | 38 -
 drivers/gpu/drm/i915/i915_gem_gtt.h | 26 -
 3 files changed, 48 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 40fea41..0b5cbe8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2498,7 +2498,8 @@ struct drm_i915_cmd_table {
 #define HAS_HW_CONTEXTS(dev)   (INTEL_INFO(dev)-gen = 6)
 #define HAS_LOGICAL_RING_CONTEXTS(dev) (INTEL_INFO(dev)-gen = 8)
 #define USES_PPGTT(dev)(i915.enable_ppgtt)
-#define USES_FULL_PPGTT(dev)   (i915.enable_ppgtt == 2)
+#define USES_FULL_PPGTT(dev)   (i915.enable_ppgtt = 2)
+#define USES_FULL_48BIT_PPGTT(dev) (i915.enable_ppgtt == 3)
 
 #define HAS_OVERLAY(dev)   (INTEL_INFO(dev)-has_overlay)
 #define OVERLAY_NEEDS_PHYSICAL(dev)
(INTEL_INFO(dev)-overlay_needs_physical)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 8c1db92..1a120a4 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -104,9 +104,12 @@ static int sanitize_enable_ppgtt(struct drm_device *dev, 
int enable_ppgtt)
 {
bool has_aliasing_ppgtt;
bool has_full_ppgtt;
+   bool has_full_64bit_ppgtt;
 
has_aliasing_ppgtt = INTEL_INFO(dev)-gen = 6;
has_full_ppgtt = INTEL_INFO(dev)-gen = 7;
+   has_full_64bit_ppgtt = (IS_BROADWELL(dev) ||
+   INTEL_INFO(dev)-gen = 9)  false; /* FIXME: 
64b */
 
if (intel_vgpu_active(dev))
has_full_ppgtt = false; /* emulation is too hard */
@@ -125,6 +128,9 @@ static int sanitize_enable_ppgtt(struct drm_device *dev, 
int enable_ppgtt)
if (enable_ppgtt == 2  has_full_ppgtt)
return 2;
 
+   if (enable_ppgtt == 3  has_full_64bit_ppgtt)
+   return 3;
+
 #ifdef CONFIG_INTEL_IOMMU
/* Disable ppgtt on SNB if VT-d is on. */
if (INTEL_INFO(dev)-gen == 6  intel_iommu_gfx_mapped) {
@@ -557,6 +563,8 @@ static void free_pdp(struct drm_device *dev,
 struct i915_page_directory_pointer *pdp)
 {
__pdp_fini(pdp);
+   if (USES_FULL_48BIT_PPGTT(dev))
+   kfree(pdp);
 }
 
 /* Broadwell Page Directory Pointer Descriptors */
@@ -686,9 +694,6 @@ gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
pt_vaddr = NULL;
 
for_each_sg_page(pages-sgl, sg_iter, pages-nents, 0) {
-   if (WARN_ON(pdpe = GEN8_LEGACY_PDPES))
-   break;
-
if (pt_vaddr == NULL) {
struct i915_page_directory *pd = 
pdp-page_directory[pdpe];
struct i915_page_table *pt = pd-page_table[pde];
@@ -1102,14 +1107,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
return ret;
 
ppgtt-base.start = 0;
-   ppgtt-base.total = 1ULL  32;
-   if (IS_ENABLED(CONFIG_X86_32))
-   /* While we have a proliferation of size_t variables
-* we cannot represent the full ppgtt size on 32bit,
-* so limit it to the same size as the GGTT (currently
-* 2GiB).
-*/
-   ppgtt-base.total = to_i915(ppgtt-base.dev)-gtt.base.total;
ppgtt-base.cleanup = gen8_ppgtt_cleanup;
ppgtt-base.allocate_va_range = gen8_alloc_va_range;
ppgtt-base.insert_entries = gen8_ppgtt_insert_entries;
@@ -1119,10 +1116,25 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 
ppgtt-switch_mm = gen8_mm_switch;
 
-   ret = __pdp_init(false, ppgtt-pdp);
+   if (!USES_FULL_48BIT_PPGTT(ppgtt-base.dev)) {
+   ret = __pdp_init(false, ppgtt-pdp);
 
-   if (ret)
+   if (ret)
+   goto free_scratch;
+
+   ppgtt-base.total = 1ULL  32;
+   if (IS_ENABLED(CONFIG_X86_32))
+   /* While we have a proliferation of size_t variables
+* we cannot represent the full ppgtt size on 32bit,
+* so limit it to the same size as the GGTT (currently
+* 2GiB).
+*/
+   ppgtt-base.total = 
to_i915(ppgtt-base.dev)-gtt.base.total;
+   } else {
+   ppgtt-base.total = 1ULL  48;
+   ret = -EPERM; /* Not yet implemented */
  

Re: [Intel-gfx] [PATCH i-g-t] tests/gem_render_linear_blits: Increase min swap required

2015-07-29 Thread Morton, Derek J


-Original Message-
From: Gordon, David S 
Sent: Wednesday, July 29, 2015 4:15 PM
To: Chris Wilson; Gore, Tim; Morton, Derek J; intel-gfx@lists.freedesktop.org; 
Wood, Thomas
Subject: Re: [Intel-gfx] [PATCH i-g-t] tests/gem_render_linear_blits: Increase 
min swap required

On 29/07/15 14:15, Chris Wilson wrote:
 On Wed, Jul 29, 2015 at 01:10:23PM +, Gore, Tim wrote:
 I don’t see how this implies a kernel bug. It seems like a test 
 problem (my subtest as it happens). I was unaware of Android systems 
 with small swap partitions (or indeed any swap at all). Not sure I 
 can understand the logic of such a tiny swap partition but given the 
 situation, unless we can accurately characterise the memory usage of 
 the test in advance then we have to either skip the test for small 
 swap, or try to monitor memory usage in an ongoing way during the test.

 If the system has enough resources to run the test (that is enough 
 physical to run an individual batch plus enough swap to hold the 
 rest), then the test must not oom.
 -Chris

The test is deliberately attempting to use enough memory to force some stuff 
out to swap, while not hitting a total OOM. That can be a very narrow window 
when the swapspace is small; and the test just guesses in advance how much 
will do the trick rather than gradually increasing its demands until it 
detects that stuff is being swapped.

So not a kernel bug, but something of a failure in the implementation of the 
test. Is there an interface it could use to /detect and measure/ when stuff is 
swapped out?

And there is also the problem that when stuff is swapped in and out that we 
have no control over what goes into the swap memory. It may well be for some 
other process that is asleep and be nothing to do with the running test.

I could rewrite the subtest to allocate a big lump of memory to fill physical 
memory, then allocate the swap/2 worth of buffer objects but there is no 
guarantee the buffer objects will go into swap rather than something else.

Also the actual workings of the OOM killer are a black art. When exactly does 
it start killing processes. There does not appear to be a fixed line that could 
be monitored with intel_get_avail_ram_mb(). And anyway how does 
intel_get_avail_ram_mb() interact with swap memory? Does it include swap 
memory? In which case the subtest would always fail. If not how can we tell 
when swap memory is filling up? Can intel_get_avail_ram_mb() go up due to the 
system swapping memory out while available swap memory goes down? The subtest 
is using intel_get_total_swap_mb(). What if there is already stuff in swap? The 
sysinfo structure appears to have a freeswap field.

How about I try the following:
1. Add an intel_get_avail_swap_mb() function
2. Update the test to allocate buffers until half of swap is used then do the 
blits. (no idea how slow calling intel_get_avail_swap_mb repeatedly will be)

//Derek


.Dave.

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v6 13/19] drm/i915/gen8: Add ppgtt info and debug_dump

2015-07-29 Thread Michel Thierry
v2: Clean up patch after rebases.
v3: gen8_dump_ppgtt for 32b and 48b PPGTT.
v4: Use used_pml4es/pdpes (Akash).
v5: Rebase after Mika's ppgtt cleanup / scratch merge patch series.
v6: Rely on used_px bits instead of null checking (Akash)

Cc: Akash Goel akash.g...@intel.com
Signed-off-by: Ben Widawsky b...@bwidawsk.net
Signed-off-by: Michel Thierry michel.thie...@intel.com (v2+)
---
 drivers/gpu/drm/i915/i915_debugfs.c | 18 
 drivers/gpu/drm/i915/i915_gem_gtt.c | 84 +
 2 files changed, 94 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 23a69307..b6f1a13 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2250,7 +2250,6 @@ static void gen6_ppgtt_info(struct seq_file *m, struct 
drm_device *dev)
 {
struct drm_i915_private *dev_priv = dev-dev_private;
struct intel_engine_cs *ring;
-   struct drm_file *file;
int i;
 
if (INTEL_INFO(dev)-gen == 6)
@@ -2273,13 +2272,6 @@ static void gen6_ppgtt_info(struct seq_file *m, struct 
drm_device *dev)
ppgtt-debug_dump(ppgtt, m);
}
 
-   list_for_each_entry_reverse(file, dev-filelist, lhead) {
-   struct drm_i915_file_private *file_priv = file-driver_priv;
-
-   seq_printf(m, proc: %s\n,
-  get_pid_task(file-pid, PIDTYPE_PID)-comm);
-   idr_for_each(file_priv-context_idr, per_file_ctx, m);
-   }
seq_printf(m, ECOCHK: 0x%08x\n, I915_READ(GAM_ECOCHK));
 }
 
@@ -2288,6 +2280,7 @@ static int i915_ppgtt_info(struct seq_file *m, void *data)
struct drm_info_node *node = m-private;
struct drm_device *dev = node-minor-dev;
struct drm_i915_private *dev_priv = dev-dev_private;
+   struct drm_file *file;
 
int ret = mutex_lock_interruptible(dev-struct_mutex);
if (ret)
@@ -2299,6 +2292,15 @@ static int i915_ppgtt_info(struct seq_file *m, void 
*data)
else if (INTEL_INFO(dev)-gen = 6)
gen6_ppgtt_info(m, dev);
 
+   list_for_each_entry_reverse(file, dev-filelist, lhead) {
+   struct drm_i915_file_private *file_priv = file-driver_priv;
+
+   seq_printf(m, \nproc: %s\n,
+  get_pid_task(file-pid, PIDTYPE_PID)-comm);
+   idr_for_each(file_priv-context_idr, per_file_ctx,
+(void *)(unsigned long)m);
+   }
+
intel_runtime_pm_put(dev_priv);
mutex_unlock(dev-struct_mutex);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 73cfe56..0d7c7c1 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1361,6 +1361,89 @@ static int gen8_alloc_va_range(struct i915_address_space 
*vm,
return gen8_alloc_va_range_3lvl(vm, ppgtt-pdp, start, length);
 }
 
+static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp,
+ uint64_t start, uint64_t length,
+ gen8_pte_t scratch_pte,
+ struct seq_file *m)
+{
+   struct i915_page_directory *pd;
+   uint64_t temp;
+   uint32_t pdpe;
+
+   gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
+   struct i915_page_table *pt;
+   uint64_t pd_len = length;
+   uint64_t pd_start = start;
+   uint32_t pde;
+
+   if (!test_bit(pdpe, pdp-used_pdpes))
+   continue;
+
+   seq_printf(m, \tPDPE #%d\n, pdpe);
+   gen8_for_each_pde(pt, pd, pd_start, pd_len, temp, pde) {
+   uint32_t  pte;
+   gen8_pte_t *pt_vaddr;
+
+   if (!test_bit(pde, pd-used_pdes))
+   continue;
+
+   pt_vaddr = kmap_px(pt);
+   for (pte = 0; pte  GEN8_PTES; pte += 4) {
+   uint64_t va =
+   (pdpe  GEN8_PDPE_SHIFT) |
+   (pde  GEN8_PDE_SHIFT) |
+   (pte  GEN8_PTE_SHIFT);
+   int i;
+   bool found = false;
+
+   for (i = 0; i  4; i++)
+   if (pt_vaddr[pte + i] != scratch_pte)
+   found = true;
+   if (!found)
+   continue;
+
+   seq_printf(m, \t\t0x%llx [%03d,%03d,%04d]: =, 
va, pdpe, pde, pte);
+   for (i = 0; i  4; i++) {
+   if (pt_vaddr[pte + i] != scratch_pte)
+   seq_printf(m,  %llx, 
pt_vaddr[pte + i]);
+   

[Intel-gfx] [PATCH v6 09/19] drm/i915/gen8: Pass sg_iter through pte inserts

2015-07-29 Thread Michel Thierry
As a step towards implementing 4 levels, while not discarding the
existing pte insert functions, we need to pass the sg_iter through.
The current function understands to the page directory granularity.
An object's pages may span the page directory, and so using the iter
directly as we write the PTEs allows the iterator to stay coherent
through a VMA insert operation spanning multiple page table levels.

v2: Rebase after s/page_tables/page_table/.
v3: Rebase after Mika's ppgtt cleanup / scratch merge patch series;
updated commit message (s/map/insert).

Signed-off-by: Ben Widawsky b...@bwidawsk.net
Signed-off-by: Michel Thierry michel.thie...@intel.com (v2+)
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index c6c8af7..7c024e98 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -749,7 +749,7 @@ static void gen8_ppgtt_clear_range(struct 
i915_address_space *vm,
 static void
 gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
  struct i915_page_directory_pointer *pdp,
- struct sg_table *pages,
+ struct sg_page_iter *sg_iter,
  uint64_t start,
  enum i915_cache_level cache_level)
 {
@@ -759,11 +759,10 @@ gen8_ppgtt_insert_pte_entries(struct i915_address_space 
*vm,
unsigned pdpe = start  GEN8_PDPE_SHIFT  GEN8_PDPE_MASK;
unsigned pde = start  GEN8_PDE_SHIFT  GEN8_PDE_MASK;
unsigned pte = start  GEN8_PTE_SHIFT  GEN8_PTE_MASK;
-   struct sg_page_iter sg_iter;
 
pt_vaddr = NULL;
 
-   for_each_sg_page(pages-sgl, sg_iter, pages-nents, 0) {
+   while (__sg_page_iter_next(sg_iter)) {
if (pt_vaddr == NULL) {
struct i915_page_directory *pd = 
pdp-page_directory[pdpe];
struct i915_page_table *pt = pd-page_table[pde];
@@ -771,7 +770,7 @@ gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
}
 
pt_vaddr[pte] =
-   gen8_pte_encode(sg_page_iter_dma_address(sg_iter),
+   gen8_pte_encode(sg_page_iter_dma_address(sg_iter),
cache_level, true);
if (++pte == GEN8_PTES) {
kunmap_px(ppgtt, pt_vaddr);
@@ -797,8 +796,10 @@ static void gen8_ppgtt_insert_entries(struct 
i915_address_space *vm,
struct i915_hw_ppgtt *ppgtt =
container_of(vm, struct i915_hw_ppgtt, base);
struct i915_page_directory_pointer *pdp = ppgtt-pdp; /* FIXME: 48b */
+   struct sg_page_iter sg_iter;
 
-   gen8_ppgtt_insert_pte_entries(vm, pdp, pages, start, cache_level);
+   __sg_page_iter_start(sg_iter, pages-sgl, sg_nents(pages-sgl), 0);
+   gen8_ppgtt_insert_pte_entries(vm, pdp, sg_iter, start, cache_level);
 }
 
 static void gen8_free_page_tables(struct drm_device *dev,
-- 
2.4.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v6 15/19] drm/i915: batch_obj vm offset must be u64

2015-07-29 Thread Michel Thierry
Otherwise it can overflow in 48-bit mode, and cause an incorrect
exec_start.

Before commit 5f19e2bffa63a91cd4ac1adcec648e14a44277ce (drm/i915: Merged
the many do_execbuf() parameters into a structure), it was already an u64.

Signed-off-by: Michel Thierry michel.thie...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 33926d9..ed2fbcd 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1674,7 +1674,7 @@ struct i915_execbuffer_params {
struct drm_file *file;
uint32_tdispatch_flags;
uint32_targs_batch_start_offset;
-   uint32_tbatch_obj_vm_offset;
+   uint64_tbatch_obj_vm_offset;
struct intel_engine_cs  *ring;
struct drm_i915_gem_object  *batch_obj;
struct intel_context*ctx;
-- 
2.4.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v6 00/19] 48-bit PPGTT

2015-07-29 Thread Michel Thierry
This clean-up version delays the 48-bit work to later patches and includes
more review comments from Akash and Chris. The first 5 patches prepare the
dynamic page allocation code to handle independent pdps, but no specific
code for 48-bit mode is added before the 5th patch.

In order expand the GPU address space, a 4th level translation is added,
the Page Map Level 4 (PML4). This PML4 has 512 PML4 Entries (PML4E),
PML4[0-511], each pointing to a PDP. All the existing dynamic alloc
ppgtt functions are used, only adding the 4th level changes. I also
updated some remaining variables that were 32b only.

There are 2 hardware workarounds needed to allow correct operation with
48b addresses (Wa32bitGeneralStateOffset  Wa32bitInstructionBaseOffset).
A flag (EXEC_OBJECT_SUPPORTS_48B_ADDRESS) will indicate if a given object can
be allocated outside the first 4 PDPs; if not, the end range is forced to 4GB.
Also, more objects now use the DRM_MM_CREATE_TOP flag. To maintain
compatibility, in libdrm I added a new drm_intel_bo_emit_reloc_48bit function
that will flag these objects, while the existing drm_intel_bo_emit_reloc
clears it.

Finally, this feature is only available in BDW and Gen9, requires LRC
submission mode (execlists) and it can be detected by i915.enable_ppgtt=3.

Also note that this expanded address space is only available for full
PPGTT, aliasing PPGTT and Global GTT remain 32-bit.

I'll resend the userland patches (libdrm/mesa) in a different patchset, there
haven't been changes on them, but they require a rebase. I will also expand the
ppgtt igt test per Chris suggestions.

Michel Thierry (19):
  drm/i915: Remove unnecessary gen8_clamp_pd
  drm/i915/gen8: Make pdp allocation more dynamic
  drm/i915/gen8: Abstract PDP usage
  drm/i915/gen8: Generalize PTE writing for GEN8 PPGTT
  drm/i915/gen8: Add dynamic page trace events
  drm/i915/gen8: Add PML4 structure
  drm/i915/gen8: implement alloc/free for 4lvl
  drm/i915/gen8: Add 4 level switching infrastructure and lrc support
  drm/i915/gen8: Pass sg_iter through pte inserts
  drm/i915/gen8: Add 4 level support in insert_entries and clear_range
  drm/i915/gen8: Initialize PDPs and PML4
  drm/i915: Expand error state's address width to 64b
  drm/i915/gen8: Add ppgtt info and debug_dump
  drm/i915: object size needs to be u64
  drm/i915: batch_obj vm offset must be u64
  drm/i915/userptr: Kill user_size limit check
  drm/i915: Wa32bitGeneralStateOffset  Wa32bitInstructionBaseOffset
  drm/i915/gen8: Flip the 48b switch
  drm/i915: Save some page table setup on repeated binds

 drivers/gpu/drm/i915/i915_debugfs.c|  18 +-
 drivers/gpu/drm/i915/i915_drv.h|  11 +-
 drivers/gpu/drm/i915/i915_gem.c|  30 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  13 +
 drivers/gpu/drm/i915/i915_gem_gtt.c| 665 -
 drivers/gpu/drm/i915/i915_gem_gtt.h|  64 ++-
 drivers/gpu/drm/i915/i915_gem_userptr.c|   4 -
 drivers/gpu/drm/i915/i915_gpu_error.c  |  24 +-
 drivers/gpu/drm/i915/i915_params.c |   2 +-
 drivers/gpu/drm/i915/i915_reg.h|   1 +
 drivers/gpu/drm/i915/i915_trace.h  |  32 +-
 drivers/gpu/drm/i915/intel_lrc.c   |  60 ++-
 include/uapi/drm/i915_drm.h|   3 +-
 13 files changed, 747 insertions(+), 180 deletions(-)

-- 
2.4.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v6 12/19] drm/i915: Expand error state's address width to 64b

2015-07-29 Thread Michel Thierry
v2: For semaphore errors, object is mapped to GGTT and offset will not
be  4GB, print only lower 32-bits (Akash)
v3: Print gtt_offset in groups of 32-bit (Chris)

Cc: Akash Goel akash.g...@intel.com
Cc: Chris Wilson ch...@chris-wilson.co.uk
Signed-off-by: Ben Widawsky b...@bwidawsk.net
Signed-off-by: Michel Thierry michel.thie...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.h   |  4 ++--
 drivers/gpu/drm/i915/i915_gpu_error.c | 24 ++--
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0b5cbe8..33926d9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -546,7 +546,7 @@ struct drm_i915_error_state {
 
struct drm_i915_error_object {
int page_count;
-   u32 gtt_offset;
+   u64 gtt_offset;
u32 *pages[0];
} *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
 
@@ -572,7 +572,7 @@ struct drm_i915_error_state {
u32 size;
u32 name;
u32 rseqno[I915_NUM_RINGS], wseqno;
-   u32 gtt_offset;
+   u64 gtt_offset;
u32 read_domains;
u32 write_domain;
s32 fence_reg:I915_MAX_NUM_FENCE_BITS;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index 6f42569..f79c952 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -197,8 +197,9 @@ static void print_error_buffers(struct 
drm_i915_error_state_buf *m,
err_printf(m,   %s [%d]:\n, name, count);
 
while (count--) {
-   err_printf(m, %08x %8u %02x %02x [ ,
-  err-gtt_offset,
+   err_printf(m, %08x_%08x %8u %02x %02x [ ,
+  upper_32_bits(err-gtt_offset),
+  lower_32_bits(err-gtt_offset),
   err-size,
   err-read_domains,
   err-write_domain);
@@ -426,15 +427,17 @@ int i915_error_state_to_str(struct 
drm_i915_error_state_buf *m,
err_printf(m,  (submitted by %s [%d]),
   error-ring[i].comm,
   error-ring[i].pid);
-   err_printf(m,  --- gtt_offset = 0x%08x\n,
-  obj-gtt_offset);
+   err_printf(m,  --- gtt_offset = 0x%08x %08x\n,
+  upper_32_bits(obj-gtt_offset),
+  lower_32_bits(obj-gtt_offset));
print_error_obj(m, obj);
}
 
obj = error-ring[i].wa_batchbuffer;
if (obj) {
err_printf(m, %s (w/a) --- gtt_offset = 0x%08x\n,
-  dev_priv-ring[i].name, obj-gtt_offset);
+  dev_priv-ring[i].name,
+  lower_32_bits(obj-gtt_offset));
print_error_obj(m, obj);
}
 
@@ -453,14 +456,14 @@ int i915_error_state_to_str(struct 
drm_i915_error_state_buf *m,
if ((obj = error-ring[i].ringbuffer)) {
err_printf(m, %s --- ringbuffer = 0x%08x\n,
   dev_priv-ring[i].name,
-  obj-gtt_offset);
+  lower_32_bits(obj-gtt_offset));
print_error_obj(m, obj);
}
 
if ((obj = error-ring[i].hws_page)) {
err_printf(m, %s --- HW Status = 0x%08x\n,
   dev_priv-ring[i].name,
-  obj-gtt_offset);
+  lower_32_bits(obj-gtt_offset));
offset = 0;
for (elt = 0; elt  PAGE_SIZE/16; elt += 4) {
err_printf(m, [%04x] %08x %08x %08x %08x\n,
@@ -476,13 +479,14 @@ int i915_error_state_to_str(struct 
drm_i915_error_state_buf *m,
if ((obj = error-ring[i].ctx)) {
err_printf(m, %s --- HW Context = 0x%08x\n,
   dev_priv-ring[i].name,
-  obj-gtt_offset);
+  lower_32_bits(obj-gtt_offset));
print_error_obj(m, obj);
}
}
 
if ((obj = error-semaphore_obj)) {
-   err_printf(m, Semaphore page = 0x%08x\n, obj-gtt_offset);
+   err_printf(m, Semaphore page = 0x%08x\n,
+  lower_32_bits(obj-gtt_offset));
for (elt = 0; elt  PAGE_SIZE/16; elt += 4) {
err_printf(m, [%04x] 

[Intel-gfx] [PATCH v6 01/19] drm/i915: Remove unnecessary gen8_clamp_pd

2015-07-29 Thread Michel Thierry
gen8_clamp_pd clamps to the next page directory boundary, but the macro
gen8_for_each_pde already has a check to stop at the page directory
boundary.

Furthermore, i915_pte_count also restricts to the next page table
boundary.

v2: Rebase after Mika's ppgtt cleanup / scratch merge patch series.

Suggested-by: Akash Goel akash.g...@intel.com
Signed-off-by: Michel Thierry michel.thie...@intel.com
---
 drivers/gpu/drm/i915/i915_gem_gtt.c |  2 +-
 drivers/gpu/drm/i915/i915_gem_gtt.h | 11 ---
 2 files changed, 1 insertion(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index c2a291e..189572d 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -955,7 +955,7 @@ static int gen8_alloc_va_range(struct i915_address_space 
*vm,
gen8_for_each_pdpe(pd, ppgtt-pdp, start, length, temp, pdpe) {
gen8_pde_t *const page_directory = kmap_px(pd);
struct i915_page_table *pt;
-   uint64_t pd_len = gen8_clamp_pd(start, length);
+   uint64_t pd_len = length;
uint64_t pd_start = start;
uint32_t pde;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h 
b/drivers/gpu/drm/i915/i915_gem_gtt.h
index e1cfa29..d5bf953 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -444,17 +444,6 @@ static inline uint32_t gen6_pde_index(uint32_t addr)
 temp = min(temp, length),  \
 start += temp, length -= temp)
 
-/* Clamp length to the next page_directory boundary */
-static inline uint64_t gen8_clamp_pd(uint64_t start, uint64_t length)
-{
-   uint64_t next_pd = ALIGN(start + 1, 1  GEN8_PDPE_SHIFT);
-
-   if (next_pd  (start + length))
-   return length;
-
-   return next_pd - start;
-}
-
 static inline uint32_t gen8_pte_index(uint64_t address)
 {
return i915_pte_index(address, GEN8_PDE_SHIFT);
-- 
2.4.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [RFC 2/5] drm/i915: Unify execlist and legacy request life-cycles

2015-07-29 Thread Nick Hoath

On 09/07/2015 12:12, Chris Wilson wrote:

On Thu, Jul 09, 2015 at 11:57:41AM +0100, Nick Hoath wrote:

There is a desire to simplify the i915 driver by reducing the number of
different code paths introduced by the LRC / execlists support.  As the
execlists request is now part of the gem request it is possible and
desirable to unify the request life-cycles for execlist and legacy
requests.

Added a context complete flag to a request which gets set during the
context switch interrupt.

Added a function i915_gem_request_retireable().  A request is considered
retireable if its seqno passed (i.e. the request has completed) and either
it was never submitted to the ELSP or its context completed.  This ensures
that context save is carried out before the last request for a context is
considered retireable.  retire_requests_ring() now uses
i915_gem_request_retireable() rather than request_complete() when deciding
which requests to retire. Requests that were not waiting for a context
switch interrupt (either as a result of being merged into a following
request or by being a legacy request) will be considered retireable as
soon as their seqno has passed.


Nak. Just keep the design as requests only retire when seqno passes.


Removed the extra request reference held for the execlist request.

Removed intel_execlists_retire_requests() and all references to
intel_engine_cs.execlist_retired_req_list.

Moved context unpinning into retire_requests_ring() for now.  Further work
is pending for the context pinning - this patch should allow us to use the
active list to track context and ring buffer objects later.

Changed gen8_cs_irq_handler() so that notify_ring() is called when
contexts complete as well as when a user interrupt occurs so that
notification happens when a request is complete and context save has
finished.

v2: Rebase over the read-read optimisation changes


Any reason why you didn't review my patches to do this much more neatly?

Do you have a link for the relevant patches?

-Chris



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 3/3] drm/i915: Support DDI lane reversal for DP

2015-07-29 Thread Benjamin Tissoires
On Jul 29 2015 or thereabouts, Sivakumar Thulasimani wrote:
 why not detect reverse in intel_dp_detect/intel_hpd_pulse ? that way you can
 identify both lane count and reversal state without touching anything in the
 link training code. i am yet to upstream my changes for CHT that i can share
 if required that does the same in intel_dp_detect without touching any line
 in link training path.

With my current limited knowledge of the dp hotplug (and i915 driver) I
am not sure we could detect the reversed state without trying to train 1
lane only. I'd be glad to look at your changes and test them on my
system if you think that could help having a cleaner solution.

Cheers,
Benjamin

 
 On 7/28/2015 9:33 PM, Benjamin Tissoires wrote:
 The DP outputs connected through a USB Type-C port can have inverted
 lanes. To detect that case, we implement autodetection by training only
 the first lane if it doesn't work, we assume that we need to invert
 the lanes.
 
 Tested on a Chromebook Pixel 2015 (samus) with a USB Type-C to HDMI
 adapter and a Dell 4K and some various regular monitors.
 
 Based on 2 patches from the ChromeOS tree by:
 Stéphane Marchesin marc...@chromium.org
 Todd Broch tbr...@chromium.org
 
 Signed-off-by: Benjamin Tissoires benjamin.tissoi...@redhat.com
 ---
   drivers/gpu/drm/i915/intel_ddi.c | 13 +
   drivers/gpu/drm/i915/intel_dp.c  | 36 
   drivers/gpu/drm/i915/intel_drv.h |  1 +
   3 files changed, 50 insertions(+)
 
 diff --git a/drivers/gpu/drm/i915/intel_ddi.c 
 b/drivers/gpu/drm/i915/intel_ddi.c
 index 9a40bfb..0b0c1ec 100644
 --- a/drivers/gpu/drm/i915/intel_ddi.c
 +++ b/drivers/gpu/drm/i915/intel_ddi.c
 @@ -2249,6 +2249,7 @@ static void intel_ddi_pre_enable(struct intel_encoder 
 *intel_encoder)
  enum port port = intel_ddi_get_encoder_port(intel_encoder);
  int type = intel_encoder-type;
  int hdmi_level;
 +bool reversed = false;
  if (type == INTEL_OUTPUT_EDP) {
  struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 @@ -2295,8 +2296,20 @@ static void intel_ddi_pre_enable(struct intel_encoder 
 *intel_encoder)
  if (type == INTEL_OUTPUT_DISPLAYPORT || type == INTEL_OUTPUT_EDP) {
  struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 +if (IS_BROADWELL(dev)  type == INTEL_OUTPUT_DISPLAYPORT) {
 +intel_ddi_init_dp_buf_reg(intel_encoder);
 +reversed = intel_dp_is_reversed(intel_dp);
 +}
 +
  intel_ddi_init_dp_buf_reg(intel_encoder);
 +if (IS_BROADWELL(dev)) {
 +if (reversed)
 +intel_dp-DP |= DDI_BUF_PORT_REVERSAL;
 +else
 +intel_dp-DP = ~DDI_BUF_PORT_REVERSAL;
 +}
 +
  intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON);
  intel_dp_start_link_train(intel_dp);
  intel_dp_complete_link_train(intel_dp);
 diff --git a/drivers/gpu/drm/i915/intel_dp.c 
 b/drivers/gpu/drm/i915/intel_dp.c
 index b740987..18280cc 100644
 --- a/drivers/gpu/drm/i915/intel_dp.c
 +++ b/drivers/gpu/drm/i915/intel_dp.c
 @@ -3820,6 +3820,42 @@ intel_dp_complete_link_train(struct intel_dp 
 *intel_dp)
  intel_dp-DP = DP;
   }
 +bool intel_dp_is_reversed(struct intel_dp *intel_dp)
 +{
 +struct drm_encoder *encoder = dp_to_dig_port(intel_dp)-base.base;
 +struct drm_device *dev = encoder-dev;
 +struct drm_i915_private *dev_priv = dev-dev_private;
 +uint32_t DP = intel_dp-DP;
 +
 +/*
 + * Train with 1 lane. There is no guarantee that the monitor supports
 + * 2 or 4 lanes, and we wouldn't see any asymetricity with 4 lanes.
 + */
 +const uint8_t lane_count = 1;
 +bool reversed;
 +
 +if (!HAS_DDI(dev))
 +return false;
 +
 +DP = ~(DDI_BUF_PORT_REVERSAL | DDI_PORT_WIDTH(4));
 +DP |= DDI_PORT_WIDTH(lane_count);
 +
 +I915_WRITE(intel_dp-output_reg, DP);
 +POSTING_READ(intel_dp-output_reg);
 +udelay(600);
 +
 +if (!_intel_dp_start_link_train(intel_dp, lane_count, DP, true))
 +return true;
 +
 +reversed = !_intel_dp_complete_link_train(intel_dp, lane_count, DP, 
 true);
 +
 +/* clear training, we had only one lane */
 +intel_dp-train_set_valid = false;
 +
 +return reversed;
 +
 +}
 +
   void intel_dp_stop_link_train(struct intel_dp *intel_dp)
   {
  intel_dp_set_link_train(intel_dp, intel_dp-DP,
 diff --git a/drivers/gpu/drm/i915/intel_drv.h 
 b/drivers/gpu/drm/i915/intel_drv.h
 index 320c9e6..cba00c6 100644
 --- a/drivers/gpu/drm/i915/intel_drv.h
 +++ b/drivers/gpu/drm/i915/intel_drv.h
 @@ -1169,6 +1169,7 @@ int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 
 *crc);
   bool intel_dp_compute_config(struct intel_encoder *encoder,
   struct intel_crtc_state *pipe_config);
   bool intel_dp_is_edp(struct drm_device *dev, enum port port);
 +bool 

Re: [Intel-gfx] [PATCH 7/7] drm/i915: Dont -ETIMEDOUT on identical new and previous (count, crc).

2015-07-29 Thread Rafael Antognolli
On Wed, Jul 29, 2015 at 10:26:53AM +0200, Daniel Vetter wrote:
 On Tue, Jul 28, 2015 at 10:05:21PM +, Vivi, Rodrigo wrote:
  On Tue, 2015-07-28 at 13:25 -0700, Rafael Antognolli wrote:
   On Thu, Jul 23, 2015 at 04:35:50PM -0700, Rodrigo Vivi wrote:
By Vesa DP 1.2 spec TEST_CRC_COUNT is a 4 bit wrap counter which
increments each time the TEST_CRC_x_x are updated.

However if we are trying to verify the screen hasn't changed we get
same (count, crc) pair twice. Without this patch we would return
-ETIMEOUT in this case.

So, if in 6 vblanks the pair (count, crc) hasn't changed we
return it anyway instead of returning error and let test case decide
if it was right or not.

Signed-off-by: Rodrigo Vivi rodrigo.v...@intel.com
   
   Looks good.
   
---
 drivers/gpu/drm/i915/intel_dp.c | 21 +++--
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_dp.c 
b/drivers/gpu/drm/i915/intel_dp.c
index c7372a1..e99ec7a 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -4028,6 +4028,7 @@ int intel_dp_sink_crc(struct intel_dp *intel_dp, 
u8 *crc)
u8 buf;
int count, ret;
int attempts = 6;
+   bool old_equal_new;
 
ret = intel_dp_sink_crc_start(intel_dp);
if (ret)
@@ -4042,6 +4043,7 @@ int intel_dp_sink_crc(struct intel_dp *intel_dp, 
u8 *crc)
goto stop;
}
count = buf  DP_TEST_COUNT_MASK;
+
/*
 * Count might be reset during the loop. In this case
 * last known count needs to be reset as well.
@@ -4053,17 +4055,24 @@ int intel_dp_sink_crc(struct intel_dp 
*intel_dp, u8 *crc)
ret = -EIO;
goto stop;
}
-   } while (--attempts  (count == 0 || (count == 
intel_dp-sink_crc.last_count 
-  
!memcmp(intel_dp-sink_crc.last_crc, crc,
-  6 * 
sizeof(u8);
+
+   old_equal_new = (count == intel_dp-sink_crc.last_count 

+!memcmp(intel_dp-sink_crc.last_crc, 
crc,
+6 * sizeof(u8)));
+
+   } while (--attempts  (count == 0 || old_equal_new));
 
intel_dp-sink_crc.last_count = buf  DP_TEST_COUNT_MASK;
memcpy(intel_dp-sink_crc.last_crc, crc, 6 * sizeof(u8));
 
if (attempts == 0) {
-   DRM_DEBUG_KMS(Panel is unable to calculate CRC after 6 
vblanks\n);
-   ret = -ETIMEDOUT;
-   goto stop;
+   if (old_equal_new) {
+   DRM_DEBUG_KMS(Unreliable Sink CRC counter: 
Current returned CRC is identical to the previous one\n);
   
   Isn't this line a little too long?
  
  I agree, but I had no idea how to make it shorter. I believe this long
  debug message is the only case where we can go over 80 characters in
  i915. but if it isn't true and/or have a suggestion how to make it
  shorter please let me know that I can change.
 
 dmesg output is explicitly an exception since breaking lines makes it much
 harder to grep for a line you spot in dmesg. Ofc 500 lines would be a bit
 too much, we're breaking those. But this one here is totally fine.

Nice, I never thought about being able to grep, but makes total sense.

 Remember, checkpatch is just suggestions mostly, not law.

I wasn't aware of it, but good to know that it exists. I'll check it out.

Reviewed-by: Rafael Antognolli rafael.antogno...@intel.com

  
   
+   } else {
+   DRM_ERROR(Panel is unable to calculate any CRC 
after 6 vblanks\n);
+   ret = -ETIMEDOUT;
+   goto stop;
+   }
}
 
 stop:
-- 
2.1.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
  
  ___
  Intel-gfx mailing list
  Intel-gfx@lists.freedesktop.org
  http://lists.freedesktop.org/mailman/listinfo/intel-gfx
 
 -- 
 Daniel Vetter
 Software Engineer, Intel Corporation
 http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH i-g-t] tests/gem_render_linear_blits: Increase min swap required

2015-07-29 Thread Chris Wilson
On Wed, Jul 29, 2015 at 04:14:55PM +0100, Dave Gordon wrote:
 On 29/07/15 14:15, Chris Wilson wrote:
 On Wed, Jul 29, 2015 at 01:10:23PM +, Gore, Tim wrote:
 I don’t see how this implies a kernel bug. It seems like a test problem (my
 subtest as it happens). I was unaware of Android systems with small swap
 partitions (or indeed any swap at all). Not sure I can understand the logic 
 of
 such a tiny swap partition but given the situation, unless we can accurately
 characterise the memory usage of the test in advance then we have to
 either skip the test for small swap, or try to monitor memory usage in an
 ongoing way during the test.
 
 If the system has enough resources to run the test (that is enough
 physical to run an individual batch plus enough swap to hold the rest),
 then the test must not oom.
 -Chris
 
 The test is deliberately attempting to use enough memory to force
 some stuff out to swap, while not hitting a total OOM. That can be a
 very narrow window when the swapspace is small; and the test just
 guesses in advance how much will do the trick rather than gradually
 increasing its demands until it detects that stuff is being swapped.
 
 So not a kernel bug, but something of a failure in the

Pardon? Which part of we have enough physical and virtual to complete
the test, but an oom is triggered instead is an incorrect assumption?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 2/3] drm/i915: hide errors when probing for a reverse display port

2015-07-29 Thread Benjamin Tissoires
On Jul 28 2015 or thereabouts, Chris Wilson wrote:
 On Tue, Jul 28, 2015 at 12:03:28PM -0400, Benjamin Tissoires wrote:
  We check the polarity of the attached dp, so it is normal to fail.
  Do not send errors to the users.
 
 if (probe) DRM_DEBUG else DRM_ERROR is fairly offensive.
 
 It strikes me that you could make each of these functions report the
 failure to the caller (have the caller even do so error handling!) and
 as part of that have the caller report an error and so demote all of
 these to DRM_DEBUG_KMS().

Yes, sorry for that. I will change it to return an actual error code and
the error string if I still need to use these functions given Sivakumar
ideas. 

 
 Who knows, actually doing some error handling may make monitor training
 more reliable! Or we may even get carried away and report the failure
 all the way back to userspace.

That would be a very good improvement indeed. But I can already tell you
that I will not do it by myself, I already have too much on my plate.
I'll do my share for this feature, but don't count on me for the whole
error handling rewrite :)

Cheers,
Benjamin

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915/skl: revert duplicated WaBarrierPerformanceFixDisable:skl

2015-07-29 Thread Marc Herbert
With this simple git diff command one can see that skl_init_workarounds()
got two copies of WaBarrierPerformanceFixDisable:skl:

 git diff -U21 ca6e4405779e^1 ca6e4405779e 
drivers/gpu/drm/i915/intel_ringbuffer.c

This happened when the backmerge of drm-intel-fixes-2015-07-15
Merged the same fix on both sides. Same fix but not identical enough for
git: with a different surrounding context; hence the code duplication.

This commit merely reverts the output of the git command above
 = the duplication introduced in the backmerge.

(This duplication was found while running git sanity checks on a
_linearized_ i915 forklift for ChromeOS.)

Signed-off-by: Marc Herbert marc.herb...@intel.com
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 7 ---
 1 file changed, 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 177f7ed16cf0..1c14233d179f 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1041,13 +1041,6 @@ static int skl_init_workarounds(struct intel_engine_cs 
*ring)
WA_SET_BIT_MASKED(HIZ_CHICKEN,
  BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE);
 
-   if (INTEL_REVID(dev) == SKL_REVID_C0 ||
-   INTEL_REVID(dev) == SKL_REVID_D0)
-   /* WaBarrierPerformanceFixDisable:skl */
-   WA_SET_BIT_MASKED(HDC_CHICKEN0,
- HDC_FENCE_DEST_SLM_DISABLE |
- HDC_BARRIER_PERFORMANCE_DISABLE);
-
if (INTEL_REVID(dev) = SKL_REVID_D0) {
/*
 *Use Force Non-Coherent whenever executing a 3D context. This
-- 
2.1.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [RFC] drm/i915: Add sync framework support to execbuff IOCTL

2015-07-29 Thread Jesse Barnes
On 07/07/2015 02:15 AM, Tvrtko Ursulin wrote:
 
 On 07/06/2015 01:58 PM, John Harrison wrote:
 On 06/07/2015 10:29, Daniel Vetter wrote:
 On Fri, Jul 03, 2015 at 12:17:33PM +0100, Tvrtko Ursulin wrote:
 On 07/02/2015 04:55 PM, Chris Wilson wrote:
 It would be nice if we could reuse one seqno both for internal/external
 fences. If you need to expose a fence ordering within a timeline
 that is
 based on the creation stamp rather than execution stamp, it seems like
 we could just add such a stamp when creating the sync_pt and not worry
 about its relationship to the execution seqno.

 Doing so does expose that requests are reordered to userspace since the
 signalling timeline is not the same as userspace's ordered timeline.
 Not
 sure if that is a problem or not.

 Afaict the sync uapi is based on waiting for all of a set of fences to
 retire. It doesn't seem to rely on fence ordering (that is knowing that
 fence A will signal before fence B so it need only wait on fence B).

 Here's hoping that we can have both simplicity and efficiency...
 Jumping in with not even perfect understanding of everything here - but
 timeline business has always been confusing me. There is nothing in the
 uapi which needs it afaics and iirc there was some discussion at the
 time
 Jesse floated his patches that it can be removed. Based on that when I
 squashed his patches and ported them on top of John's request to fence
 conversion it ended up something like the below (manually edited a
 bit to
 be less noisy and some prep patches omitted):

 This implements the ioctl based uapi and indeed seqnos are not actually
 used in waits. So is this insufficient for some reason? (Other that it
 does not implement the input fence side of things.)
 Yeah android syncpt on top of struct fence embedded int i915 request is
 what I'd have expected.
 The thing I'm not happy with in that plan is that it leaves the kernel
 driver at the mercy of user land applications. If we return a fence
 object to user land via a file descriptor (or indeed any other
 mechanism) then that fence object must be locked until user land closes
 the file. If the fence object is the one embedded within our request
 structure then that means user land is effectively locking our request
 structure. Given that more and more stuff is being attached to the
 request, that could be a fair bit of memory tied up that we can do
 nothing about. E.g. if a rogue/buggy application requests a fence be
 returned for every batch buffer submitted but never closes them.
 Whereas, if we go the route of a separate fence object specifically for
 user land then they can leak them like a sieve and we won't really care
 so much.
 
 I am starting to agree gradually with this view. Given all the
 complications, referencing requests for exporting via fds feels quite
 heavy-weight, with potentially unbound dependencies and more
 trickiness in the future, even if we agreed on referencing and locking
 details.
 
 Seqnos per context sounds like a significantly more light-weight and
 decoupled implementation.

I think this is the right long term direction as well; conceptually the
per-context seqnos make the most sense in light of scheduling, and they
let us keep things simple for sync pts as well.  Only question is, who
is signed up to make it all work?

Jesse

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm: Fixup locking WARNINGs in drm_mode_config_reset

2015-07-29 Thread Daniel Vetter
On Wed, Jul 29, 2015 at 11:15:23PM +0300, Laurent Pinchart wrote:
 Hi Daniel,
 
 Thank you for the patch.
 
 On Wednesday 29 July 2015 08:32:43 Daniel Vetter wrote:
  With
  
  commit 7a3f3d6667f5f9ffd1517f6b21d64bbf5312042c
  Author: Daniel Vetter daniel.vet...@ffwll.ch
  Date:   Thu Jul 9 23:44:28 2015 +0200
  
  drm: Check locking in drm_for_each_connector
  
  we started checking the locking in drm_for_each_connector but somehow
  I totally missed drm_mode_config_reset. There's no problem there since
  this function should only be called in single-threaded contexts
  (driver load or resume), so just wrap the loop with the right lock.
  
  v2: Drink coffee and all that ...
  
  Cc: Laurent Pinchart laurent.pinch...@ideasonboard.com
  Reported-by: Laurent Pinchart laurent.pinch...@ideasonboard.com
  Signed-off-by: Daniel Vetter daniel.vet...@intel.com
 
 This gets rid of the warning at driver load time with the rcar-du-drm driver.
 
 Tested-by: Laurent Pinchart laurent.pinch...@ideasonboard.com

Thanks for testing, applied patch to drm-misc.
 
  ---
   drivers/gpu/drm/drm_crtc.c | 2 ++
   1 file changed, 2 insertions(+)
  
  diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
  index c91c18b2b1d4..10c1a0f6680c 100644
  --- a/drivers/gpu/drm/drm_crtc.c
  +++ b/drivers/gpu/drm/drm_crtc.c
  @@ -5273,9 +5273,11 @@ void drm_mode_config_reset(struct drm_device *dev)
  if (encoder-funcs-reset)
  encoder-funcs-reset(encoder);
  
  +   mutex_lock(dev-mode_config.mutex);
  drm_for_each_connector(connector, dev)
  if (connector-funcs-reset)
  connector-funcs-reset(connector);
  +   mutex_unlock(dev-mode_config.mutex);
 
 Wouldn't it make sense to protect the whole function with the mode_config 
 mutex, given that it's documented as protecting the KMS related lists and 
 structures ?

Well that was correct ages ago I guess but since then we've split out tons
of other locks. It doesn't protect anything else really since those lists
are all static after driver load, and even here it's just to shut up the
WARNING (which is right for runtime in general and not harmful to grab the
lock really either).

I do have a plan to fix up kerneldoc, but I want to get the various
improvements collabora is working on in first.
-Daniel

 
   }
   EXPORT_SYMBOL(drm_mode_config_reset);
 
 -- 
 Regards,
 
 Laurent Pinchart
 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 1/2] drm/i915/skl: Don't expose the top most plane on gen9 display

2015-07-29 Thread Konduru, Chandra


 -Original Message-
 From: Intel-gfx [mailto:intel-gfx-boun...@lists.freedesktop.org] On Behalf Of
 Damien Lespiau
 Sent: Thursday, July 16, 2015 9:08 AM
 To: intel-gfx@lists.freedesktop.org
 Subject: [Intel-gfx] [PATCH 1/2] drm/i915/skl: Don't expose the top most plane
 on gen9 display
 
 on SKL/BXT, the top most plane hardware is shared between the legacy
 cursor registers and an actual plane. Daniel and Ville don't want to
 expose 2 DRM planes and would rather expose a CURSOR plane that has all
 the usual plane properties, and that's a blocker for lifting the
 prelimary_hw_support flag.
 
 Unfortunately noone has had the time to finish this yet, but lifting the
 prelimary_hw_support flag is long overdue. As an intermediate solution
 we can merely not expose the top most plane
 
 Cc: Imre Deak imre.d...@intel.com
 Signed-off-by: Damien Lespiau damien.lesp...@intel.com
 ---
  drivers/gpu/drm/i915/i915_dma.c | 16 
  1 file changed, 12 insertions(+), 4 deletions(-)
 
 diff --git a/drivers/gpu/drm/i915/i915_dma.c
 b/drivers/gpu/drm/i915/i915_dma.c
 index 91925cb..96564cd 100644
 --- a/drivers/gpu/drm/i915/i915_dma.c
 +++ b/drivers/gpu/drm/i915/i915_dma.c
 @@ -726,11 +726,19 @@ static void intel_device_info_runtime_init(struct
 drm_device *dev)
 
   info = (struct intel_device_info *)dev_priv-info;
 
 + /*
 +  * Skylake and Broxton currently don't expose the topmost plane as its
 +  * use is exclusive with the legacy cursor and we only want to expose
 +  * one of those, not both. Until we can safely expose the topmost plane
 +  * as a DRM_PLANE_TYPE_CURSOR with all the features
 exposed/supported,
 +  * we don't expose the topmost plane at all to prevent ABI breakage
 +  * down the line.
 +  */
   if (IS_BROXTON(dev)) {
 - info-num_sprites[PIPE_A] = 3;
 - info-num_sprites[PIPE_B] = 3;
 - info-num_sprites[PIPE_C] = 2;
 - } else if (IS_VALLEYVIEW(dev) || INTEL_INFO(dev)-gen == 9)
 + info-num_sprites[PIPE_A] = 2;
 + info-num_sprites[PIPE_B] = 2;
 + info-num_sprites[PIPE_C] = 1;
 + } else if (IS_VALLEYVIEW(dev))
   for_each_pipe(dev_priv, pipe)
   info-num_sprites[pipe] = 2;

By the way, this is breaking kms_plane_scaling where 3rd plane used as
regular plane as it was allowed before by kernel.
There may be other igts also impacted by this change.

   else
 --
 2.1.0
 
 ___
 Intel-gfx mailing list
 Intel-gfx@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/intel-gfx
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 05/10 v5] drm/i915: Enable GuC firmware log

2015-07-29 Thread Dave Gordon
From: Alex Dai yu@intel.com

Allocate a GEM object to hold GuC log data. A debugfs interface
(i915_guc_log_dump) is provided to print out the log content.

v2:
Add struct members at point of use [Chris Wilson]

v5:
Rebased

Issue: VIZ-4884
Signed-off-by: Alex Dai yu@intel.com
Signed-off-by: Dave Gordon david.s.gor...@intel.com
---
 drivers/gpu/drm/i915/i915_debugfs.c| 29 +++
 drivers/gpu/drm/i915/i915_guc_submission.c | 46 ++
 drivers/gpu/drm/i915/intel_guc.h   |  1 +
 3 files changed, 76 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 5c8f53c..bc12c70 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2410,6 +2410,34 @@ static int i915_guc_load_status_info(struct seq_file *m, 
void *data)
return 0;
 }
 
+static int i915_guc_log_dump(struct seq_file *m, void *data)
+{
+   struct drm_info_node *node = m-private;
+   struct drm_device *dev = node-minor-dev;
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   struct drm_i915_gem_object *log_obj = dev_priv-guc.log_obj;
+   u32 *log;
+   int i = 0, pg;
+
+   if (!log_obj)
+   return 0;
+
+   for (pg = 0; pg  log_obj-base.size / PAGE_SIZE; pg++) {
+   log = kmap_atomic(i915_gem_object_get_page(log_obj, pg));
+
+   for (i = 0; i  PAGE_SIZE / sizeof(u32); i += 4)
+   seq_printf(m, 0x%08x 0x%08x 0x%08x 0x%08x\n,
+  *(log + i), *(log + i + 1),
+  *(log + i + 2), *(log + i + 3));
+
+   kunmap_atomic(log);
+   }
+
+   seq_putc(m, '\n');
+
+   return 0;
+}
+
 static int i915_edp_psr_status(struct seq_file *m, void *data)
 {
struct drm_info_node *node = m-private;
@@ -5104,6 +5132,7 @@ static const struct drm_info_list i915_debugfs_list[] = {
{i915_gem_hws_vebox, i915_hws_info, 0, (void *)VECS},
{i915_gem_batch_pool, i915_gem_batch_pool_info, 0},
{i915_guc_load_status, i915_guc_load_status_info, 0},
+   {i915_guc_log_dump, i915_guc_log_dump, 0},
{i915_frequency_info, i915_frequency_info, 0},
{i915_hangcheck_info, i915_hangcheck_info, 0},
{i915_drpc_info, i915_drpc_info, 0},
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c 
b/drivers/gpu/drm/i915/i915_guc_submission.c
index 8ff59aa..669c889 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -79,6 +79,47 @@ static void gem_release_guc_obj(struct drm_i915_gem_object 
*obj)
drm_gem_object_unreference(obj-base);
 }
 
+static void guc_create_log(struct intel_guc *guc)
+{
+   struct drm_i915_private *dev_priv = guc_to_i915(guc);
+   struct drm_i915_gem_object *obj;
+   unsigned long offset;
+   uint32_t size, flags;
+
+   if (i915.guc_log_level  GUC_LOG_VERBOSITY_MIN)
+   return;
+
+   if (i915.guc_log_level  GUC_LOG_VERBOSITY_MAX)
+   i915.guc_log_level = GUC_LOG_VERBOSITY_MAX;
+
+   /* The first page is to save log buffer state. Allocate one
+* extra page for others in case for overlap */
+   size = (1 + GUC_LOG_DPC_PAGES + 1 +
+   GUC_LOG_ISR_PAGES + 1 +
+   GUC_LOG_CRASH_PAGES + 1)  PAGE_SHIFT;
+
+   obj = guc-log_obj;
+   if (!obj) {
+   obj = gem_allocate_guc_obj(dev_priv-dev, size);
+   if (!obj) {
+   /* logging will be off */
+   i915.guc_log_level = -1;
+   return;
+   }
+
+   guc-log_obj = obj;
+   }
+
+   /* each allocated unit is a page */
+   flags = GUC_LOG_VALID | GUC_LOG_NOTIFY_ON_HALF_FULL |
+   (GUC_LOG_DPC_PAGES  GUC_LOG_DPC_SHIFT) |
+   (GUC_LOG_ISR_PAGES  GUC_LOG_ISR_SHIFT) |
+   (GUC_LOG_CRASH_PAGES  GUC_LOG_CRASH_SHIFT);
+
+   offset = i915_gem_obj_ggtt_offset(obj)  PAGE_SHIFT; /* in pages */
+   guc-log_flags = (offset  GUC_LOG_BUF_ADDR_SHIFT) | flags;
+}
+
 /*
  * Set up the memory resources to be shared with the GuC.  At this point,
  * we require just one object that can be mapped through the GGTT.
@@ -103,6 +144,8 @@ int i915_guc_submission_init(struct drm_device *dev)
 
ida_init(guc-ctx_ids);
 
+   guc_create_log(guc);
+
return 0;
 }
 
@@ -111,6 +154,9 @@ void i915_guc_submission_fini(struct drm_device *dev)
struct drm_i915_private *dev_priv = dev-dev_private;
struct intel_guc *guc = dev_priv-guc;
 
+   gem_release_guc_obj(dev_priv-guc.log_obj);
+   guc-log_obj = NULL;
+
if (guc-ctx_pool_obj)
ida_destroy(guc-ctx_ids);
gem_release_guc_obj(guc-ctx_pool_obj);
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index be3cad8..5b51b05 100644
--- 

[Intel-gfx] [PATCH 07/10 v5] drm/i915: Interrupt routing for GuC submission

2015-07-29 Thread Dave Gordon
Turn on interrupt steering to route necessary interrupts to GuC.

v5:
Rebased

Issue: VIZ-4884
Signed-off-by: Alex Dai yu@intel.com
Signed-off-by: Dave Gordon david.s.gor...@intel.com
---
 drivers/gpu/drm/i915/i915_reg.h | 11 +--
 drivers/gpu/drm/i915/intel_guc_loader.c | 51 +
 2 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index ff58245..8e39a89 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -1665,12 +1665,18 @@ enum skl_disp_power_wells {
 #define GFX_MODE_GEN7  0x0229c
 #define RING_MODE_GEN7(ring)   ((ring)-mmio_base+0x29c)
 #define   GFX_RUN_LIST_ENABLE  (115)
+#define   GFX_INTERRUPT_STEERING   (114)
 #define   GFX_TLB_INVALIDATE_EXPLICIT  (113)
 #define   GFX_SURFACE_FAULT_ENABLE (112)
 #define   GFX_REPLAY_MODE  (111)
 #define   GFX_PSMI_GRANULARITY (110)
 #define   GFX_PPGTT_ENABLE (19)
 
+#define   GFX_FORWARD_VBLANK_MASK  (35)
+#define   GFX_FORWARD_VBLANK_NEVER (05)
+#define   GFX_FORWARD_VBLANK_ALWAYS(15)
+#define   GFX_FORWARD_VBLANK_COND  (25)
+
 #define VLV_DISPLAY_BASE 0x18
 #define VLV_MIPI_BASE VLV_DISPLAY_BASE
 
@@ -5684,11 +5690,12 @@ enum skl_disp_power_wells {
 #define GEN8_GT_IIR(which) (0x44308 + (0x10 * (which)))
 #define GEN8_GT_IER(which) (0x4430c + (0x10 * (which)))
 
-#define GEN8_BCS_IRQ_SHIFT 16
 #define GEN8_RCS_IRQ_SHIFT 0
-#define GEN8_VCS2_IRQ_SHIFT 16
+#define GEN8_BCS_IRQ_SHIFT 16
 #define GEN8_VCS1_IRQ_SHIFT 0
+#define GEN8_VCS2_IRQ_SHIFT 16
 #define GEN8_VECS_IRQ_SHIFT 0
+#define GEN8_WD_IRQ_SHIFT 16
 
 #define GEN8_DE_PIPE_ISR(pipe) (0x44400 + (0x10 * (pipe)))
 #define GEN8_DE_PIPE_IMR(pipe) (0x44404 + (0x10 * (pipe)))
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c 
b/drivers/gpu/drm/i915/intel_guc_loader.c
index b371e15..fc9a996 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -79,6 +79,53 @@ const char *intel_guc_fw_status_repr(enum 
intel_guc_fw_status status)
}
 };
 
+static void direct_interrupts_to_host(struct drm_i915_private *dev_priv)
+{
+   struct intel_engine_cs *ring;
+   int i, irqs;
+
+   /* tell all command streamers NOT to forward interrupts and vblank to 
GuC */
+   irqs = _MASKED_FIELD(GFX_FORWARD_VBLANK_MASK, GFX_FORWARD_VBLANK_NEVER);
+   irqs |= _MASKED_BIT_DISABLE(GFX_INTERRUPT_STEERING);
+   for_each_ring(ring, dev_priv, i)
+   I915_WRITE(RING_MODE_GEN7(ring), irqs);
+
+   /* tell DE to send nothing to GuC */
+   I915_WRITE(DE_GUCRMR, ~0);
+
+   /* route all GT interrupts to the host */
+   I915_WRITE(GUC_BCS_RCS_IER, 0);
+   I915_WRITE(GUC_VCS2_VCS1_IER, 0);
+   I915_WRITE(GUC_WD_VECS_IER, 0);
+}
+
+static void direct_interrupts_to_guc(struct drm_i915_private *dev_priv)
+{
+   struct intel_engine_cs *ring;
+   int i, irqs;
+
+   /* tell all command streamers to forward interrupts and vblank to GuC */
+   irqs = _MASKED_FIELD(GFX_FORWARD_VBLANK_MASK, 
GFX_FORWARD_VBLANK_ALWAYS);
+   irqs |= _MASKED_BIT_ENABLE(GFX_INTERRUPT_STEERING);
+   for_each_ring(ring, dev_priv, i)
+   I915_WRITE(RING_MODE_GEN7(ring), irqs);
+
+   /* tell DE to send (all) flip_done to GuC */
+   irqs = DERRMR_PIPEA_PRI_FLIP_DONE | DERRMR_PIPEA_SPR_FLIP_DONE |
+  DERRMR_PIPEB_PRI_FLIP_DONE | DERRMR_PIPEB_SPR_FLIP_DONE |
+  DERRMR_PIPEC_PRI_FLIP_DONE | DERRMR_PIPEC_SPR_FLIP_DONE;
+   /* Unmasked bits will cause GuC response message to be sent */
+   I915_WRITE(DE_GUCRMR, ~irqs);
+
+   /* route USER_INTERRUPT to Host, all others are sent to GuC. */
+   irqs = GT_RENDER_USER_INTERRUPT  GEN8_RCS_IRQ_SHIFT |
+  GT_RENDER_USER_INTERRUPT  GEN8_BCS_IRQ_SHIFT;
+   /* These three registers have the same bit definitions */
+   I915_WRITE(GUC_BCS_RCS_IER, ~irqs);
+   I915_WRITE(GUC_VCS2_VCS1_IER, ~irqs);
+   I915_WRITE(GUC_WD_VECS_IER, ~irqs);
+}
+
 static u32 get_gttype(struct drm_i915_private *dev_priv)
 {
/* XXX: GT type based on PCI device ID? field seems unused by fw */
@@ -344,6 +391,7 @@ int intel_guc_ucode_load(struct drm_device *dev)
intel_guc_fw_status_repr(guc_fw-guc_fw_fetch_status),
intel_guc_fw_status_repr(guc_fw-guc_fw_load_status));
 
+   direct_interrupts_to_host(dev_priv);
i915_guc_submission_disable(dev);
 
if (guc_fw-guc_fw_fetch_status == GUC_FIRMWARE_NONE)
@@ -397,6 +445,7 @@ int intel_guc_ucode_load(struct drm_device *dev)
err = i915_guc_submission_enable(dev);
if (err)
goto fail;
+   direct_interrupts_to_guc(dev_priv);
}
 
return 0;
@@ -405,6 +454,7 @@ fail:
if (guc_fw-guc_fw_load_status == GUC_FIRMWARE_PENDING)

[Intel-gfx] [PATCH 10/10 v5] drm/i915: Enable GuC submission, where supported

2015-07-29 Thread Dave Gordon
Signed-off-by: Dave Gordon david.s.gor...@intel.com

v5:
Rebased

Signed-off-by: Dave Gordon david.s.gor...@intel.com
---
 drivers/gpu/drm/i915/i915_params.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_params.c 
b/drivers/gpu/drm/i915/i915_params.c
index 5ae4b0a..9263a7e 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -52,7 +52,7 @@ struct i915_params i915 __read_mostly = {
.mmio_debug = 0,
.verbose_state_checks = 1,
.edp_vswing = 0,
-   .enable_guc_submission = false,
+   .enable_guc_submission = true,
.guc_log_level = -1,
 };
 
@@ -185,7 +185,7 @@ MODULE_PARM_DESC(edp_vswing,
 2=default swing(400mV)));
 
 module_param_named_unsafe(enable_guc_submission, i915.enable_guc_submission, 
bool, 0400);
-MODULE_PARM_DESC(enable_guc_submission, Enable GuC submission 
(default:false));
+MODULE_PARM_DESC(enable_guc_submission, Enable GuC submission 
(default:true));
 
 module_param_named(guc_log_level, i915.guc_log_level, int, 0400);
 MODULE_PARM_DESC(guc_log_level,
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 06/10 v5] drm/i915: Implementation of GuC submission client

2015-07-29 Thread Dave Gordon
A GuC client has its own doorbell and workqueue. It maintains the
doorbell cache line, process description object and work queue item.

A default guc_client is created for the i915 driver to use for
normal-priority in-order submission.

Note that the created client is not yet ready for use; doorbell
allocation will fail as we haven't yet linked the GuC's context
descriptor to the default contexts for each ring (see later patch).

v2:
Defer adding structure members until needed [Chris Wilson]
Rationalise type declarations [Chris Wilson]

v5:
Add GuC per-engine submission  seqno statistics.
Move wq locking to encompass both get_space() and add_item().
Take forcewake lock in host2guc_action() [Tom O'Rourke]

Issue: VIZ-4884
Signed-off-by: Alex Dai yu@intel.com
Signed-off-by: Dave Gordon david.s.gor...@intel.com
---
 drivers/gpu/drm/i915/i915_guc_submission.c | 652 +
 drivers/gpu/drm/i915/intel_guc.h   |  46 ++
 drivers/gpu/drm/i915/intel_guc_loader.c|  10 +
 3 files changed, 708 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c 
b/drivers/gpu/drm/i915/i915_guc_submission.c
index 669c889..5437960 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -27,6 +27,517 @@
 #include intel_guc.h
 
 /**
+ * DOC: GuC Client
+ *
+ * i915_guc_client:
+ * We use the term client to avoid confusion with contexts. A i915_guc_client 
is
+ * equivalent to GuC object guc_context_desc. This context descriptor is
+ * allocated from a pool of 1024 entries. Kernel driver will allocate doorbell
+ * and workqueue for it. Also the process descriptor (guc_process_desc), which
+ * is mapped to client space. So the client can write Work Item then ring the
+ * doorbell.
+ *
+ * To simplify the implementation, we allocate one gem object that contains all
+ * pages for doorbell, process descriptor and workqueue.
+ *
+ * The Scratch registers:
+ * There are 16 MMIO-based registers start from 0xC180. The kernel driver 
writes
+ * a value to the action register (SOFT_SCRATCH_0) along with any data. It then
+ * triggers an interrupt on the GuC via another register write (0xC4C8).
+ * Firmware writes a success/fail code back to the action register after
+ * processes the request. The kernel driver polls waiting for this update and
+ * then proceeds.
+ * See host2guc_action()
+ *
+ * Doorbells:
+ * Doorbells are interrupts to uKernel. A doorbell is a single cache line (QW)
+ * mapped into process space.
+ *
+ * Work Items:
+ * There are several types of work items that the host may place into a
+ * workqueue, each with its own requirements and limitations. Currently only
+ * WQ_TYPE_INORDER is needed to support legacy submission via GuC, which
+ * represents in-order queue. The kernel driver packs ring tail pointer and an
+ * ELSP context descriptor dword into Work Item.
+ * See guc_add_workqueue_item()
+ *
+ */
+
+/*
+ * Read GuC command/status register (SOFT_SCRATCH_0)
+ * Return true if it contains a response rather than a command
+ */
+static inline bool host2guc_action_response(struct drm_i915_private *dev_priv,
+   u32 *status)
+{
+   u32 val = I915_READ(SOFT_SCRATCH(0));
+   *status = val;
+   return GUC2HOST_IS_RESPONSE(val);
+}
+
+static int host2guc_action(struct intel_guc *guc, u32 *data, u32 len)
+{
+   struct drm_i915_private *dev_priv = guc_to_i915(guc);
+   u32 status;
+   int i;
+   int ret;
+
+   if (WARN_ON(len  1 || len  15))
+   return -EINVAL;
+
+   intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+   spin_lock(dev_priv-guc.host2guc_lock);
+
+   dev_priv-guc.action_count += 1;
+   dev_priv-guc.action_cmd = data[0];
+
+   for (i = 0; i  len; i++)
+   I915_WRITE(SOFT_SCRATCH(i), data[i]);
+
+   POSTING_READ(SOFT_SCRATCH(i - 1));
+
+   I915_WRITE(HOST2GUC_INTERRUPT, HOST2GUC_TRIGGER);
+
+   /* No HOST2GUC command should take longer than 10ms */
+   ret = wait_for_atomic(host2guc_action_response(dev_priv, status), 10);
+   if (status != GUC2HOST_STATUS_SUCCESS) {
+   /*
+* Either the GuC explicitly returned an error (which
+* we convert to -EIO here) or no response at all was
+* received within the timeout limit (-ETIMEDOUT)
+*/
+   if (ret != -ETIMEDOUT)
+   ret = -EIO;
+
+   DRM_ERROR(GUC: host2guc action 0x%X failed. ret=%d 
+   status=0x%08X response=0x%08X\n,
+   data[0], ret, status,
+   I915_READ(SOFT_SCRATCH(15)));
+
+   dev_priv-guc.action_fail += 1;
+   dev_priv-guc.action_err = ret;
+   }
+   dev_priv-guc.action_status = status;
+
+   spin_unlock(dev_priv-guc.host2guc_lock);
+   

[Intel-gfx] [PATCH 04/10 v5] drm/i915: Prepare for GuC-based command submission

2015-07-29 Thread Dave Gordon
From: Alex Dai yu@intel.com

This adds the first of the data structures used to communicate with the
GuC (the pool of guc_context structures).

We create a GuC-specific wrapper round the GEM object allocator as all
GEM objects shared with the GuC must be pinned into GGTT space at an
address that is NOT in the range [0..WOPCM_TOP), as that range of GGTT
addresses is not accessible to the GuC (from the GuC's point of view,
it's permanently reserved for other objects such as the BootROM  SRAM).

Later, we will need to allocate additional GuC-sharable objects for the
submission client(s) and the GuC's debug log.

v2:
Remove redundant initialisation [Chris Wilson]
Defer adding struct members until needed [Chris Wilson]
Local functions should pass dev_priv rather than dev [Chris Wilson]

v5:
Invalidate GuC TLB after allocating and pinning a new object

Issue: VIZ-4884
Signed-off-by: Alex Dai yu@intel.com
Signed-off-by: Dave Gordon david.s.gor...@intel.com
---
 drivers/gpu/drm/i915/Makefile  |   3 +-
 drivers/gpu/drm/i915/i915_guc_submission.c | 118 +
 drivers/gpu/drm/i915/intel_guc.h   |   7 ++
 drivers/gpu/drm/i915/intel_guc_loader.c|  21 +
 4 files changed, 148 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/i915/i915_guc_submission.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index cc359e0..ddb69f3 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -41,7 +41,8 @@ i915-y += i915_cmd_parser.o \
  intel_uncore.o
 
 # general-purpose microcontroller (GuC) support
-i915-y += intel_guc_loader.o
+i915-y += intel_guc_loader.o \
+ i915_guc_submission.o
 
 # autogenerated null render state
 i915-y += intel_renderstate_gen6.o \
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c 
b/drivers/gpu/drm/i915/i915_guc_submission.c
new file mode 100644
index 000..8ff59aa
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -0,0 +1,118 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the Software),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+#include linux/firmware.h
+#include linux/circ_buf.h
+#include i915_drv.h
+#include intel_guc.h
+
+/**
+ * gem_allocate_guc_obj() - Allocate gem object for GuC usage
+ * @dev:   drm device
+ * @size:  size of object
+ *
+ * This is a wrapper to create a gem obj. In order to use it inside GuC, the
+ * object needs to be pinned lifetime. Also we must pin it to gtt space other
+ * than [0, GUC_WOPCM_TOP) because this range is reserved inside GuC.
+ *
+ * Return: A drm_i915_gem_object if successful, otherwise NULL.
+ */
+static struct drm_i915_gem_object *gem_allocate_guc_obj(struct drm_device *dev,
+   u32 size)
+{
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   struct drm_i915_gem_object *obj;
+
+   obj = i915_gem_alloc_object(dev, size);
+   if (!obj)
+   return NULL;
+
+   if (i915_gem_object_get_pages(obj)) {
+   drm_gem_object_unreference(obj-base);
+   return NULL;
+   }
+
+   if (i915_gem_obj_ggtt_pin(obj, PAGE_SIZE,
+   PIN_OFFSET_BIAS | GUC_WOPCM_TOP)) {
+   drm_gem_object_unreference(obj-base);
+   return NULL;
+   }
+
+   /* Invalidate GuC TLB to let GuC take the latest updates to GTT. */
+   I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
+
+   return obj;
+}
+
+/**
+ * gem_release_guc_obj() - Release gem object allocated for GuC usage
+ * @obj:   gem obj to be released
+  */
+static void gem_release_guc_obj(struct drm_i915_gem_object *obj)
+{
+   if (!obj)
+   return;
+
+   if (i915_gem_obj_is_pinned(obj))
+   i915_gem_object_ggtt_unpin(obj);
+
+   drm_gem_object_unreference(obj-base);
+}
+
+/*
+ * 

[Intel-gfx] [PATCH 02/10 v5] drm/i915: Debugfs interface to read GuC load status

2015-07-29 Thread Dave Gordon
From: Alex Dai yu@intel.com

The new node provides access to the status of the GuC-specific loader;
also the scratch registers used for communication between the i915
driver and the GuC firmware.

v2:
Changes to output formats per Chris Wilson's suggestions

v5:
Rebased

Issue: VIZ-4884
Signed-off-by: Alex Dai yu@intel.com
Signed-off-by: Dave Gordon david.s.gor...@intel.com
---
 drivers/gpu/drm/i915/i915_debugfs.c | 39 +
 1 file changed, 39 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 23a69307..5c8f53c 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2372,6 +2372,44 @@ static int i915_llc(struct seq_file *m, void *data)
return 0;
 }
 
+static int i915_guc_load_status_info(struct seq_file *m, void *data)
+{
+   struct drm_info_node *node = m-private;
+   struct drm_i915_private *dev_priv = node-minor-dev-dev_private;
+   struct intel_guc_fw *guc_fw = dev_priv-guc.guc_fw;
+   u32 tmp, i;
+
+   if (!HAS_GUC_UCODE(dev_priv-dev))
+   return 0;
+
+   seq_printf(m, GuC firmware status:\n);
+   seq_printf(m, \tpath: %s\n,
+   guc_fw-guc_fw_path);
+   seq_printf(m, \tfetch: %s\n,
+   intel_guc_fw_status_repr(guc_fw-guc_fw_fetch_status));
+   seq_printf(m, \tload: %s\n,
+   intel_guc_fw_status_repr(guc_fw-guc_fw_load_status));
+   seq_printf(m, \tversion wanted: %d.%d\n,
+   guc_fw-guc_fw_major_wanted, guc_fw-guc_fw_minor_wanted);
+   seq_printf(m, \tversion found: %d.%d\n,
+   guc_fw-guc_fw_major_found, guc_fw-guc_fw_minor_found);
+
+   tmp = I915_READ(GUC_STATUS);
+
+   seq_printf(m, \nGuC status 0x%08x:\n, tmp);
+   seq_printf(m, \tBootrom status = 0x%x\n,
+   (tmp  GS_BOOTROM_MASK)  GS_BOOTROM_SHIFT);
+   seq_printf(m, \tuKernel status = 0x%x\n,
+   (tmp  GS_UKERNEL_MASK)  GS_UKERNEL_SHIFT);
+   seq_printf(m, \tMIA Core status = 0x%x\n,
+   (tmp  GS_MIA_MASK)  GS_MIA_SHIFT);
+   seq_puts(m, \nScratch registers:\n);
+   for (i = 0; i  16; i++)
+   seq_printf(m, \t%2d: \t0x%x\n, i, I915_READ(SOFT_SCRATCH(i)));
+
+   return 0;
+}
+
 static int i915_edp_psr_status(struct seq_file *m, void *data)
 {
struct drm_info_node *node = m-private;
@@ -5065,6 +5103,7 @@ static const struct drm_info_list i915_debugfs_list[] = {
{i915_gem_hws_bsd, i915_hws_info, 0, (void *)VCS},
{i915_gem_hws_vebox, i915_hws_info, 0, (void *)VECS},
{i915_gem_batch_pool, i915_gem_batch_pool_info, 0},
+   {i915_guc_load_status, i915_guc_load_status_info, 0},
{i915_frequency_info, i915_frequency_info, 0},
{i915_hangcheck_info, i915_hangcheck_info, 0},
{i915_drpc_info, i915_drpc_info, 0},
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 00/10 v5] Batch submission via GuC

2015-07-29 Thread Dave Gordon
This patch series enables command submission via the GuC. In this mode,
instead of the host CPU driving the execlist port directly, it hands
over work items to the GuC, using a doorbell mechanism to tell the GuC
that new items have been added to its work queue. The GuC then dispatches
contexts to the various GPU engines, and manages the resulting context-
switch interrupts. Completion of a batch is however still signalled to
the CPU; the GuC is not involved in handling user interrupts.

There are two subsequences within the patch series:

  drm/i915: GuC-specific firmware loader
  drm/i915: Debugfs interface to read GuC load status

These two patches provide the GuC loader and a debugfs interface to
verify the resulting state.  At this point in the sequence we can load
and activate the GuC firmware, but not submit any batches through it.
(This is nonetheless a potentially useful state, as the GuC could do
other useful work even when not handling batch submissions).

  drm/i915: Expose one LRC function for GuC submission mode
  drm/i915: Prepare for GuC-based command submission
  drm/i915: Enable GuC firmware log
  drm/i915: Implementation of GuC submission client
  drm/i915: Interrupt routing for GuC submission
  drm/i915: Integrate GuC-based command submission
  drm/i915: Debugfs interface for GuC submission statistics
  drm/i915: Enable GuC submission, where supported

In this second section, we implement the GuC submission mechanism, link
it into the (execlist-based) submission path, and finally enable it
(on supported platforms).

On platforms where there is no GuC, or if GuC submission is explicitly
disabled, batch submission will revert to using the execlist mechanism
directly.  On the other hand, if the GuC firmware cannot be found or is
invalid, the GPU will be unusable.

The GuC firmware itself is not included in this patchset; it is or will
be available for download from https://01.org/linuxgraphics/downloads/
This driver works with and requires GuC firmware revision 3.x. It will
not work with any firmware version 1.x, as the GuC protocol in those
revisions was incompatible and is no longer supported.

Ben Widawsky (0):
Vinit Azad (0):
Michael H. Nguyen (0):
  created the original versions on which some of these patches are based.

Alex Dai (5):
  drm/i915: GuC-specific firmware loader
  drm/i915: Debugfs interface to read GuC load status
  drm/i915: Prepare for GuC-based command submission
  drm/i915: Enable GuC firmware log
  drm/i915: Integrate GuC-based command submission

Dave Gordon (5):
  drm/i915: Expose one LRC function for GuC submission mode
  drm/i915: Implementation of GuC submission client
  drm/i915: Interrupt routing for GuC submission
  drm/i915: Debugfs interface for GuC submission statistics
  drm/i915: Enable GuC submission, where supported

 Documentation/DocBook/drm.tmpl |  14 +
 drivers/gpu/drm/i915/Makefile  |   4 +
 drivers/gpu/drm/i915/i915_debugfs.c| 122 +++-
 drivers/gpu/drm/i915/i915_dma.c|   9 +
 drivers/gpu/drm/i915/i915_drv.h|  11 +
 drivers/gpu/drm/i915/i915_gem.c|  16 +
 drivers/gpu/drm/i915/i915_gpu_error.c  |  13 +-
 drivers/gpu/drm/i915/i915_guc_reg.h|  17 +-
 drivers/gpu/drm/i915/i915_guc_submission.c | 887 +
 drivers/gpu/drm/i915/i915_params.c |   4 +-
 drivers/gpu/drm/i915/i915_reg.h|  15 +-
 drivers/gpu/drm/i915/intel_guc.h   | 122 
 drivers/gpu/drm/i915/intel_guc_fwif.h  |   3 +-
 drivers/gpu/drm/i915/intel_guc_loader.c| 613 
 drivers/gpu/drm/i915/intel_lrc.c   |  65 ++-
 drivers/gpu/drm/i915/intel_lrc.h   |   8 +
 16 files changed, 1881 insertions(+), 42 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_guc_submission.c
 create mode 100644 drivers/gpu/drm/i915/intel_guc.h
 create mode 100644 drivers/gpu/drm/i915/intel_guc_loader.c

-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 1/2] drm/i915/skl: Don't expose the top most plane on gen9 display

2015-07-29 Thread Konduru, Chandra


 -Original Message-
 From: Intel-gfx [mailto:intel-gfx-boun...@lists.freedesktop.org] On Behalf Of
 Damien Lespiau
 Sent: Thursday, July 16, 2015 9:08 AM
 To: intel-gfx@lists.freedesktop.org
 Subject: [Intel-gfx] [PATCH 1/2] drm/i915/skl: Don't expose the top most plane
 on gen9 display
 
 on SKL/BXT, the top most plane hardware is shared between the legacy
 cursor registers and an actual plane. Daniel and Ville don't want to
 expose 2 DRM planes and would rather expose a CURSOR plane that has all
 the usual plane properties, and that's a blocker for lifting the
 prelimary_hw_support flag.
 
 Unfortunately noone has had the time to finish this yet, but lifting the
 prelimary_hw_support flag is long overdue. As an intermediate solution
 we can merely not expose the top most plane
 
 Cc: Imre Deak imre.d...@intel.com
 Signed-off-by: Damien Lespiau damien.lesp...@intel.com
 ---
  drivers/gpu/drm/i915/i915_dma.c | 16 
  1 file changed, 12 insertions(+), 4 deletions(-)
 
 diff --git a/drivers/gpu/drm/i915/i915_dma.c
 b/drivers/gpu/drm/i915/i915_dma.c
 index 91925cb..96564cd 100644
 --- a/drivers/gpu/drm/i915/i915_dma.c
 +++ b/drivers/gpu/drm/i915/i915_dma.c
 @@ -726,11 +726,19 @@ static void intel_device_info_runtime_init(struct
 drm_device *dev)
 
   info = (struct intel_device_info *)dev_priv-info;
 
 + /*
 +  * Skylake and Broxton currently don't expose the topmost plane as its
 +  * use is exclusive with the legacy cursor and we only want to expose
 +  * one of those, not both. Until we can safely expose the topmost plane
 +  * as a DRM_PLANE_TYPE_CURSOR with all the features
 exposed/supported,
 +  * we don't expose the topmost plane at all to prevent ABI breakage
 +  * down the line.
 +  */
   if (IS_BROXTON(dev)) {
 - info-num_sprites[PIPE_A] = 3;
 - info-num_sprites[PIPE_B] = 3;
 - info-num_sprites[PIPE_C] = 2;
 - } else if (IS_VALLEYVIEW(dev) || INTEL_INFO(dev)-gen == 9)
 + info-num_sprites[PIPE_A] = 2;
 + info-num_sprites[PIPE_B] = 2;
 + info-num_sprites[PIPE_C] = 1;
 + } else if (IS_VALLEYVIEW(dev))
   for_each_pipe(dev_priv, pipe)
   info-num_sprites[pipe] = 2;

For SKL, by default you can keep as 2 planes + 1 cursor, but let 
userland change to 3 planes + no cursor. This is a one-time 
request only and not ping ponging between 2+1 and 3+0.
For Android, this gives flexibility to use 3 planes where 
cursor isn't a dominant case. 

Similarly, for BXT, allow 4+0 and keep 3+1 as default for 
legacy userlands.
 
   else
 --
 2.1.0
 
 ___
 Intel-gfx mailing list
 Intel-gfx@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/intel-gfx
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915: Mark PIN_USER binding as GLOBAL_BIND without the aliasing ppgtt

2015-07-29 Thread Chris Wilson
If the device does not support the aliasing ppgtt, we must translate
user bind requests (PIN_USER) from LOCAL_BIND to a GLOBAL_BIND. However,
since this is device specific we cannot do this conveniently in the
upper layers and so must manage the vma-bound flags in the backend.

Partial revert of commit 75d04a3773ecee617847de963ae4195d6aa74c28 [4.2-rc1]
Author: Mika Kuoppala mika.kuopp...@linux.intel.com
Date:   Tue Apr 28 17:56:17 2015 +0300

drm/i915/gtt: Allocate va range only if vma is not bound

Note this was spotted by Daniel originally, but we dropped the ball in
getting the fix in before the bug going wild. Sorry all.

Reported-by: Vincent Legoll vincent.leg...@gmail.com
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91133
References: https://bugs.freedesktop.org/show_bug.cgi?id=90224
Signed-off-by: Chris Wilson ch...@chris-wilson.co.uk
Cc: Michel Thierry michel.thie...@intel.com
Cc: Daniel Vetter daniel.vet...@ffwll.ch
Cc: Mika Kuoppala mika.kuopp...@intel.com
Cc: Jani Nikula jani.nik...@intel.com
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 9d3852c521c7..c0d8e1f5b5c2 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1928,6 +1928,17 @@ static int ggtt_bind_vma(struct i915_vma *vma,
vma-vm-insert_entries(vma-vm, pages,
vma-node.start,
cache_level, pte_flags);
+
+   /* Note the inconsistency here is due to absence of the
+* aliasing ppgtt on gen4 and earlier. Though we always
+* request PIN_USER for execbuffer (translated to LOCAL_BIND),
+* without the appgtt, we cannot honour that request and so
+* must substitute it with a global binding. Since we do this
+* behind the upper layers back, we need to explicitly set
+* the bound flag ourselves.
+*/
+   vma-bound |= GLOBAL_BIND;
+
}
 
if (dev_priv-mm.aliasing_ppgtt  flags  LOCAL_BIND) {
-- 
2.4.6

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm: Fixup locking WARNINGs in drm_mode_config_reset

2015-07-29 Thread Laurent Pinchart
Hi Daniel,

Thank you for the patch.

On Wednesday 29 July 2015 08:32:43 Daniel Vetter wrote:
 With
 
 commit 7a3f3d6667f5f9ffd1517f6b21d64bbf5312042c
 Author: Daniel Vetter daniel.vet...@ffwll.ch
 Date:   Thu Jul 9 23:44:28 2015 +0200
 
 drm: Check locking in drm_for_each_connector
 
 we started checking the locking in drm_for_each_connector but somehow
 I totally missed drm_mode_config_reset. There's no problem there since
 this function should only be called in single-threaded contexts
 (driver load or resume), so just wrap the loop with the right lock.
 
 v2: Drink coffee and all that ...
 
 Cc: Laurent Pinchart laurent.pinch...@ideasonboard.com
 Reported-by: Laurent Pinchart laurent.pinch...@ideasonboard.com
 Signed-off-by: Daniel Vetter daniel.vet...@intel.com

This gets rid of the warning at driver load time with the rcar-du-drm driver.

Tested-by: Laurent Pinchart laurent.pinch...@ideasonboard.com

 ---
  drivers/gpu/drm/drm_crtc.c | 2 ++
  1 file changed, 2 insertions(+)
 
 diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
 index c91c18b2b1d4..10c1a0f6680c 100644
 --- a/drivers/gpu/drm/drm_crtc.c
 +++ b/drivers/gpu/drm/drm_crtc.c
 @@ -5273,9 +5273,11 @@ void drm_mode_config_reset(struct drm_device *dev)
   if (encoder-funcs-reset)
   encoder-funcs-reset(encoder);
 
 + mutex_lock(dev-mode_config.mutex);
   drm_for_each_connector(connector, dev)
   if (connector-funcs-reset)
   connector-funcs-reset(connector);
 + mutex_unlock(dev-mode_config.mutex);

Wouldn't it make sense to protect the whole function with the mode_config 
mutex, given that it's documented as protecting the KMS related lists and 
structures ?

  }
  EXPORT_SYMBOL(drm_mode_config_reset);

-- 
Regards,

Laurent Pinchart

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 09/10 v5] drm/i915: Debugfs interface for GuC submission statistics

2015-07-29 Thread Dave Gordon
This provides a means of reading status and counts relating
to GuC actions and submissions.

v2:
Remove surplus blank line in output [Chris Wilson]

v5:
Added GuC per-engine submission  seqno statistics

Signed-off-by: Dave Gordon david.s.gor...@intel.com
Signed-off-by: Alex Dai yu@intel.com
---
 drivers/gpu/drm/i915/i915_debugfs.c | 52 +
 1 file changed, 52 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index cd40909..9b44fb1 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2410,6 +2410,57 @@ static int i915_guc_load_status_info(struct seq_file *m, 
void *data)
return 0;
 }
 
+static int i915_guc_info(struct seq_file *m, void *data)
+{
+   struct drm_info_node *node = m-private;
+   struct drm_device *dev = node-minor-dev;
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   struct intel_engine_cs *ring;
+   struct intel_guc guc;
+   struct i915_guc_client client = { .client_obj = 0 };
+   enum intel_ring_id i;
+   u64 total = 0;
+
+   if (!HAS_GUC_SCHED(dev_priv-dev))
+   return 0;
+
+   /* Take a local copy of the GuC data, so we can dump it at leisure */
+   spin_lock(dev_priv-guc.host2guc_lock);
+   guc = dev_priv-guc;
+   if (guc.execbuf_client) {
+   spin_lock(guc.execbuf_client-wq_lock);
+   client = *guc.execbuf_client;
+   spin_unlock(guc.execbuf_client-wq_lock);
+   }
+   spin_unlock(dev_priv-guc.host2guc_lock);
+
+   seq_printf(m, GuC total action count: %llu\n, guc.action_count);
+   seq_printf(m, GuC last action command: 0x%x\n, guc.action_cmd);
+   seq_printf(m, GuC last action status: 0x%x\n, guc.action_status);
+
+   seq_printf(m, GuC action failure count: %u\n, guc.action_fail);
+   seq_printf(m, GuC last action error code: %d\n, guc.action_err);
+
+   seq_printf(m, \nGuC submissions:\n);
+   for_each_ring(ring, dev_priv, i) {
+   seq_printf(m, \t%-24s: %10llu, last seqno 0x%08x %9d\n,
+   ring-name, guc.submissions[i],
+   guc.last_seqno[i], guc.last_seqno[i]);
+   total += guc.submissions[i];
+   }
+   seq_printf(m, \t%s: %llu\n, Total, total);
+
+   seq_printf(m, \nGuC execbuf client @ %p:\n, guc.execbuf_client);
+   seq_printf(m, \tTotal submissions: %llu\n, client.submissions);
+   seq_printf(m, \tFailed to queue: %u\n, client.q_fail);
+   seq_printf(m, \tFailed doorbell: %u\n, client.b_fail);
+   seq_printf(m, \tLast submission result: %d\n, client.retcode);
+
+   /* Add more as required ... */
+
+   return 0;
+}
+
 static int i915_guc_log_dump(struct seq_file *m, void *data)
 {
struct drm_info_node *node = m-private;
@@ -5131,6 +5182,7 @@ static const struct drm_info_list i915_debugfs_list[] = {
{i915_gem_hws_bsd, i915_hws_info, 0, (void *)VCS},
{i915_gem_hws_vebox, i915_hws_info, 0, (void *)VECS},
{i915_gem_batch_pool, i915_gem_batch_pool_info, 0},
+   {i915_guc_info, i915_guc_info, 0},
{i915_guc_load_status, i915_guc_load_status_info, 0},
{i915_guc_log_dump, i915_guc_log_dump, 0},
{i915_frequency_info, i915_frequency_info, 0},
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 08/10 v5] drm/i915: Integrate GuC-based command submission

2015-07-29 Thread Dave Gordon
From: Alex Dai yu@intel.com

GuC-based submission is mostly the same as execlist mode, up to
intel_logical_ring_advance_and_submit(), where the context being
dispatched would be added to the execlist queue; at this point
we submit the context to the GuC backend instead.

There are, however, a few other changes also required, notably:
1.  Contexts must be pinned at GGTT addresses accessible by the GuC
i.e. NOT in the range [0..WOPCM_SIZE), so we have to add the
PIN_OFFSET_BIAS flag to the relevant GGTT-pinning calls.

2.  The GuC's TLB must be invalidated after a context is pinned at
a new GGTT address.

3.  GuC firmware uses the one page before Ring Context as shared data.
Therefore, whenever driver wants to get base address of LRC, we
will offset one page for it. LRC_PPHWSP_PN is defined as the page
number of LRCA.

4.  In the work queue used to pass requests to the GuC, the GuC
firmware requires the ring-tail-offset to be represented as an
11-bit value, expressed in QWords. Therefore, the ringbuffer
size must be reduced to the representable range (4 pages).

v2:
Defer adding #defines until needed [Chris Wilson]
Rationalise type declarations [Chris Wilson]

v4:
Squashed kerneldoc patch into here [Daniel Vetter]

v5:
Update request-tail in code common to both GuC and execlist modes.
Add a private version of lr_context_update(), as sharing the
execlist version leads to race conditions when the CPU and
the GuC both update TAIL in the context image.
Conversion of error-captured HWS page to string must account
for offset from start of object to actual HWS (LRC_PPHWSP_PN).

Issue: VIZ-4884
Signed-off-by: Alex Dai yu@intel.com
Signed-off-by: Dave Gordon david.s.gor...@intel.com
---
 Documentation/DocBook/drm.tmpl | 14 ++
 drivers/gpu/drm/i915/i915_debugfs.c|  2 +-
 drivers/gpu/drm/i915/i915_gpu_error.c  | 13 ++---
 drivers/gpu/drm/i915/i915_guc_submission.c | 79 --
 drivers/gpu/drm/i915/intel_guc.h   |  1 +
 drivers/gpu/drm/i915/intel_lrc.c   | 55 ++---
 drivers/gpu/drm/i915/intel_lrc.h   |  6 +++
 7 files changed, 142 insertions(+), 28 deletions(-)

diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl
index 4111902..a01fca9 100644
--- a/Documentation/DocBook/drm.tmpl
+++ b/Documentation/DocBook/drm.tmpl
@@ -4152,6 +4152,20 @@ int num_ioctls;/synopsis
   /sect2
 /sect1
 sect1
+  titleGuC-based Command Submission/title
+  sect2
+titleGuC/title
+!Pdrivers/gpu/drm/i915/intel_guc_loader.c GuC-specific firmware loader
+!Idrivers/gpu/drm/i915/intel_guc_loader.c
+  /sect2
+  sect2
+titleGuC Client/title
+!Pdrivers/gpu/drm/i915/intel_guc_submission.c GuC-based command submissison
+!Idrivers/gpu/drm/i915/intel_guc_submission.c
+  /sect2
+/sect1
+
+sect1
   title Tracing /title
   para
 This sections covers all things related to the tracepoints implemented in
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index bc12c70..cd40909 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1995,7 +1995,7 @@ static void i915_dump_lrc_obj(struct seq_file *m,
return;
}
 
-   page = i915_gem_object_get_page(ctx_obj, 1);
+   page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
if (!WARN_ON(page == NULL)) {
reg_state = kmap_atomic(page);
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index 6f42569..00ca5ae 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -458,21 +458,22 @@ int i915_error_state_to_str(struct 
drm_i915_error_state_buf *m,
}
 
if ((obj = error-ring[i].hws_page)) {
-   err_printf(m, %s --- HW Status = 0x%08x\n,
+   err_printf(m, %s --- HW Status = 0x%08lx\n,
   dev_priv-ring[i].name,
-  obj-gtt_offset);
+  obj-gtt_offset + LRC_PPHWSP_PN * PAGE_SIZE);
offset = 0;
for (elt = 0; elt  PAGE_SIZE/16; elt += 4) {
err_printf(m, [%04x] %08x %08x %08x %08x\n,
   offset,
-  obj-pages[0][elt],
-  obj-pages[0][elt+1],
-  obj-pages[0][elt+2],
-  obj-pages[0][elt+3]);
+  obj-pages[LRC_PPHWSP_PN][elt],
+  obj-pages[LRC_PPHWSP_PN][elt+1],
+  obj-pages[LRC_PPHWSP_PN][elt+2],
+ 

[Intel-gfx] [PATCH 01/10 v5] drm/i915: GuC-specific firmware loader

2015-07-29 Thread Dave Gordon
From: Alex Dai yu@intel.com

This fetches the required firmware image from the filesystem,
then loads it into the GuC's memory via a dedicated DMA engine.

This patch is derived from GuC loading work originally done by
Vinit Azad and Ben Widawsky.

v2:
Various improvements per review comments by Chris Wilson

v3:
Removed 'wait' parameter to intel_guc_ucode_load() as firmware
prefetch is no longer supported in the common firmware loader,
per Daniel Vetter's request.
Firmware checker callback fn now returns errno rather than bool.

v4:
Squash uC-independent code into GuC-specifc loader [Daniel Vetter]
Don't keep the driver working (by falling back to execlist mode)
if GuC firmware loading fails [Daniel Vetter]

v5:
Clarify WOPCM-related #defines [Tom O'Rourke]
Delete obsolete code no longer required with current h/w  f/w
[Tom O'Rourke]
Move the call to intel_guc_ucode_init() later, so that it can
allocate GEM objects, and have it fetch the firmware; then
intel_guc_ucode_load() doesn't need to fetch it later.
[Daniel Vetter].

Issue: VIZ-4884
Signed-off-by: Alex Dai yu@intel.com
Signed-off-by: Dave Gordon david.s.gor...@intel.com
---
 drivers/gpu/drm/i915/Makefile   |   3 +
 drivers/gpu/drm/i915/i915_dma.c |   9 +
 drivers/gpu/drm/i915/i915_drv.h |  11 +
 drivers/gpu/drm/i915/i915_gem.c |  16 +
 drivers/gpu/drm/i915/i915_guc_reg.h |  17 +-
 drivers/gpu/drm/i915/i915_reg.h |   4 +-
 drivers/gpu/drm/i915/intel_guc.h|  67 
 drivers/gpu/drm/i915/intel_guc_fwif.h   |   3 +-
 drivers/gpu/drm/i915/intel_guc_loader.c | 531 
 9 files changed, 652 insertions(+), 9 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/intel_guc.h
 create mode 100644 drivers/gpu/drm/i915/intel_guc_loader.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 41fb8a9..cc359e0 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -40,6 +40,9 @@ i915-y += i915_cmd_parser.o \
  intel_ringbuffer.o \
  intel_uncore.o
 
+# general-purpose microcontroller (GuC) support
+i915-y += intel_guc_loader.o
+
 # autogenerated null render state
 i915-y += intel_renderstate_gen6.o \
  intel_renderstate_gen7.o \
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index ab37d11..2193cc2 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -435,6 +435,11 @@ static int i915_load_modeset_init(struct drm_device *dev)
 * working irqs for e.g. gmbus and dp aux transfers. */
intel_modeset_init(dev);
 
+   /* intel_guc_ucode_init() needs the mutex to allocate GEM objects */
+   mutex_lock(dev-struct_mutex);
+   intel_guc_ucode_init(dev);
+   mutex_unlock(dev-struct_mutex);
+
ret = i915_gem_init(dev);
if (ret)
goto cleanup_irq;
@@ -476,6 +481,9 @@ cleanup_gem:
i915_gem_context_fini(dev);
mutex_unlock(dev-struct_mutex);
 cleanup_irq:
+   mutex_lock(dev-struct_mutex);
+   intel_guc_ucode_fini(dev);
+   mutex_unlock(dev-struct_mutex);
drm_irq_uninstall(dev);
 cleanup_gem_stolen:
i915_gem_cleanup_stolen(dev);
@@ -1128,6 +1136,7 @@ int i915_driver_unload(struct drm_device *dev)
flush_workqueue(dev_priv-wq);
 
mutex_lock(dev-struct_mutex);
+   intel_guc_ucode_fini(dev);
i915_gem_cleanup_ringbuffer(dev);
i915_gem_context_fini(dev);
mutex_unlock(dev-struct_mutex);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 04aa34a..2c539df 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -50,6 +50,7 @@
 #include linux/intel-iommu.h
 #include linux/kref.h
 #include linux/pm_qos.h
+#include intel_guc.h
 
 /* General customization:
  */
@@ -1697,6 +1698,8 @@ struct drm_i915_private {
 
struct i915_virtual_gpu vgpu;
 
+   struct intel_guc guc;
+
struct intel_csr csr;
 
/* Display CSR-related protection */
@@ -1941,6 +1944,11 @@ static inline struct drm_i915_private 
*dev_to_i915(struct device *dev)
return to_i915(dev_get_drvdata(dev));
 }
 
+static inline struct drm_i915_private *guc_to_i915(struct intel_guc *guc)
+{
+   return container_of(guc, struct drm_i915_private, guc);
+}
+
 /* Iterate over initialised rings */
 #define for_each_ring(ring__, dev_priv__, i__) \
for ((i__) = 0; (i__)  I915_NUM_RINGS; (i__)++) \
@@ -2544,6 +2552,9 @@ struct drm_i915_cmd_table {
 
 #define HAS_CSR(dev)   (IS_SKYLAKE(dev))
 
+#define HAS_GUC_UCODE(dev) (IS_GEN9(dev))
+#define HAS_GUC_SCHED(dev) (IS_GEN9(dev))
+
 #define HAS_RESOURCE_STREAMER(dev) (IS_HASWELL(dev) || \
INTEL_INFO(dev)-gen = 8)
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c

[Intel-gfx] [PATCH 03/10 v5] drm/i915: Expose one LRC function for GuC submission mode

2015-07-29 Thread Dave Gordon
GuC submission is basically execlist submission, but with the GuC
handling the actual writes to the ELSP and the resulting context
switch interrupts.  So to describe a context for submission via
the GuC, we need one of the same functions used in execlist mode.
This commit exposes one such function, changing its name to better
describe what it does (it's related to logical ring contexts rather
than to execlists per se).

v2:
Replaces previous drm/i915: Move execlists defines from .c to .h

v3:
Incorporates a change to one of the functions exposed here that was
previously part of an internal patch, but which was omitted from
the version recently committed to drm-intel-nightly:
7a01a0a drm/i915/lrc: Update PDPx registers with lri commands
So we reinstate this change here.

v4:
Drop v3 change, update function parameters due to collision with
8ee3615 drm/i915: Convert execlists_ctx_descriptor() for requests

v5:
Don't expose execlists_update_context() after all. The current
version is no longer compatible with GuC submission; trying to
share the execlist version of this function results in both GuC
and CPU updating TAIL in the context image, with bad results when
they get out of step. The GuC submission path now has its own
private version that just updates the ringbuffer start address,
and not TAIL or PDPx.

Issue: VIZ-4884
Signed-off-by: Dave Gordon david.s.gor...@intel.com
---
 drivers/gpu/drm/i915/intel_lrc.c | 10 +-
 drivers/gpu/drm/i915/intel_lrc.h |  2 ++
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 99bba8e..309d088 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -262,11 +262,11 @@ u32 intel_execlists_ctx_id(struct drm_i915_gem_object 
*ctx_obj)
return lrca  12;
 }
 
-static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_request *rq)
+uint64_t intel_lr_context_descriptor(struct intel_context *ctx,
+struct intel_engine_cs *ring)
 {
-   struct intel_engine_cs *ring = rq-ring;
struct drm_device *dev = ring-dev;
-   struct drm_i915_gem_object *ctx_obj = rq-ctx-engine[ring-id].state;
+   struct drm_i915_gem_object *ctx_obj = ctx-engine[ring-id].state;
uint64_t desc;
uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj);
 
@@ -304,13 +304,13 @@ static void execlists_elsp_write(struct 
drm_i915_gem_request *rq0,
uint64_t desc[2];
 
if (rq1) {
-   desc[1] = execlists_ctx_descriptor(rq1);
+   desc[1] = intel_lr_context_descriptor(rq1-ctx, rq1-ring);
rq1-elsp_submitted++;
} else {
desc[1] = 0;
}
 
-   desc[0] = execlists_ctx_descriptor(rq0);
+   desc[0] = intel_lr_context_descriptor(rq0-ctx, rq0-ring);
rq0-elsp_submitted++;
 
/* You must always write both descriptors in the order below. */
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 64f89f99..5e5788c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -74,6 +74,8 @@ int intel_lr_context_deferred_create(struct intel_context 
*ctx,
 void intel_lr_context_unpin(struct drm_i915_gem_request *req);
 void intel_lr_context_reset(struct drm_device *dev,
struct intel_context *ctx);
+uint64_t intel_lr_context_descriptor(struct intel_context *ctx,
+struct intel_engine_cs *ring);
 
 /* Execlists */
 int intel_sanitize_enable_execlists(struct drm_device *dev, int 
enable_execlists);
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [REGRESSION] Re: i915 driver crashes on T540p if docking station attached

2015-07-29 Thread Theodore Ts'o
On Wed, Jul 29, 2015 at 08:49:37PM -0400, Theodore Ts'o wrote:
 
 Unfortunately the failure causes a series of recursive faults and I
 haven't been able to capture the stack trace, but on 4.2-rcX kernels,
 I can reliably cause the system to crash if my T540p is booted with
 the docking station attached.
 
 It will also crash if I boot the system first, and then insert the
 laptop into the dockstation.
 
 Unfortunately, I can't get a stack trace because there are a huge
 number of recursive/double faults, and the system dies so quickly that
 nothing ends up in the log files.  If you really need a stack dump I
 can try to rig something, but modern Laptops don't have serial
 consoles any more, alas, so it's bit of a pain.

The bad news is that I tried to use kdump to capture a crashdump and
hopefully get more information, and kdump utterly wedged on the panic.
The good news is because it wedged the system, I was able to get the
console stackdump before it scrolled off due to a whole series of
recursive oops messages.

It's here:  https://goo.gl/photos/xHjn2Z97JQEw6k2C9

Hopefully tihs is useful.  It's not obvious how to revert this change,
since there were a large number of changes to i915 after this.  If
someone could help me with a revert, I'd be happy to test it.

Thanks,

- Ted



 
 I was able to bisect it down to this commit, however: 8c7b5ccb72987:
 drm/i915: Use atomic helpers for computing changed flags:
 
 Is there any chance Intel could add a Lenovo Dockstation with a
 Multistream DP output to part of your test hardware?  Unfortunately it
 seems pretty common that I see regressions with my particular
 hardware.  Maybe there aren't enough people using Thinkpads any more?  :-(
 
 - Ted
 
 
 P.S.  The git bisect log
 
 git bisect start
 # bad: [421d125c06c4be4c5005cb69840206bd09b71dd6] builddeb: sign the modules 
 after splitting out the debuginfo files
 git bisect bad 421d125c06c4be4c5005cb69840206bd09b71dd6
 # good: [b953c0d234bc72e8489d3bf51a276c5c4ec85345] Linux 4.1
 git bisect good b953c0d234bc72e8489d3bf51a276c5c4ec85345
 # good: [aeaa2122af4e53f3bfd28e8f294557bb95af43fc] drm/i915/skl: Add the INIT 
 power domain to the MISC I/O power well
 git bisect good aeaa2122af4e53f3bfd28e8f294557bb95af43fc
 # bad: [4d70f38a760ad2879d2ebd84001c92980180f630] drm/i915/bios: remove a 
 redundant NULL pointer check
 git bisect bad 4d70f38a760ad2879d2ebd84001c92980180f630
 # bad: [27a1b688d9f1fa2abd14bfe6a8729a19fb3b1b25] drm/i915/bxt: Enable 
 WaEnableYV12BugFixInHalfSliceChicken7 for Broxton
 git bisect bad 27a1b688d9f1fa2abd14bfe6a8729a19fb3b1b25
 # good: [4be0731786de10d0e9ae1d159504c83c6b052647] drm/i915: Add crtc states 
 before calling compute_config()
 git bisect good 4be0731786de10d0e9ae1d159504c83c6b052647
 # good: [d5432a9d19b61ba6a2b3d88f3026e0ca60eb57a1] drm/i915: Stage new 
 modeset state straight into atomic state
 git bisect good d5432a9d19b61ba6a2b3d88f3026e0ca60eb57a1
 # bad: [a821fc46bc7bb6d4cf9a5f8d2787fd70231c2c10] drm/i915: Swap atomic state 
 in legacy modeset
 git bisect bad a821fc46bc7bb6d4cf9a5f8d2787fd70231c2c10
 # bad: [8c7b5ccb729870e606321b3703e2c2e698c49a95] drm/i915: Use atomic 
 helpers for computing changed flags
 git bisect bad 8c7b5ccb729870e606321b3703e2c2e698c49a95
 # good: [0f63cca2afdc38877e86acfa9821020f6e2213fd] drm/i915: Update crtc 
 state active flag based on DPMS
 git bisect good 0f63cca2afdc38877e86acfa9821020f6e2213fd
 # good: [840bfe953384a134c8639f2964d9b74bfa671e16] drm/atomic: Make 
 mode_fixup() optional for check_modeset()
 git bisect good 840bfe953384a134c8639f2964d9b74bfa671e16
 # first bad commit: [8c7b5ccb729870e606321b3703e2c2e698c49a95] drm/i915: Use 
 atomic helpers for computing changed flags
 
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] linux-next: manual merge of the drm-misc tree with Linus' tree

2015-07-29 Thread Stephen Rothwell
Hi all,

Today's linux-next merge of the drm-misc tree got a conflict in:

  drivers/gpu/drm/drm_crtc.c

between commit:

  5677d67ae394 (drm: Stop resetting connector state to unknown)

from Linus' tree and commit:

  1c473be11958 (drm: Fixup locking WARNINGs in drm_mode_config_reset)

from the drm-misc tree.

I fixed it up (see below) and can carry the fix as necessary (no action
is required).

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au

diff --cc drivers/gpu/drm/drm_crtc.c
index c91c18b2b1d4,7d02e32b4e94..
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@@ -5273,9 -5273,14 +5273,11 @@@ void drm_mode_config_reset(struct drm_d
if (encoder-funcs-reset)
encoder-funcs-reset(encoder);
  
+   mutex_lock(dev-mode_config.mutex);
 -  drm_for_each_connector(connector, dev) {
 -  connector-status = connector_status_unknown;
 -
 +  drm_for_each_connector(connector, dev)
if (connector-funcs-reset)
connector-funcs-reset(connector);
 -  }
+   mutex_unlock(dev-mode_config.mutex);
  }
  EXPORT_SYMBOL(drm_mode_config_reset);
  
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v6 05/19] drm/i915/gen8: Add dynamic page trace events

2015-07-29 Thread Goel, Akash

Reviewed the patch  it looks fine.
Reviewed-by: Akash Goel akash.g...@intel.com

On 7/29/2015 9:53 PM, Michel Thierry wrote:

The dynamic page allocation patch series added it for GEN6, this patch
adds them for GEN8.

v2: Consolidate pagetable/page_directory events
v3: Multiple rebases.
v4: Rebase after s/page_tables/page_table/.
v5: Rebase after Mika's ppgtt cleanup / scratch merge patch series.
v6: Rebase after gen8_map_pagetable_range removal.
v7: Use generic page name (px) in DECLARE_EVENT_CLASS (Akash)
v8: Defer define of i915_page_directory_pointer_entry_alloc (Akash)

Cc: Akash Goel akash.g...@intel.com
Signed-off-by: Ben Widawsky b...@bwidawsk.net
Signed-off-by: Michel Thierry michel.thie...@intel.com (v3+)
---
  drivers/gpu/drm/i915/i915_gem_gtt.c |  6 ++
  drivers/gpu/drm/i915/i915_trace.h   | 24 
  2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index f338a13..8c1db92 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -844,6 +844,7 @@ static int gen8_ppgtt_alloc_pagetabs(struct 
i915_address_space *vm,
gen8_initialize_pt(vm, pt);
pd-page_table[pde] = pt;
__set_bit(pde, new_pts);
+   trace_i915_page_table_entry_alloc(vm, pde, start, 
GEN8_PDE_SHIFT);
}

return 0;
@@ -904,6 +905,7 @@ gen8_ppgtt_alloc_page_directories(struct i915_address_space 
*vm,
gen8_initialize_pd(vm, pd);
pdp-page_directory[pdpe] = pd;
__set_bit(pdpe, new_pds);
+   trace_i915_page_directory_entry_alloc(vm, pdpe, start, 
GEN8_PDPE_SHIFT);
}

return 0;
@@ -1053,6 +1055,10 @@ static int gen8_alloc_va_range(struct i915_address_space 
*vm,
/* Map the PDE to the page table */
page_directory[pde] = gen8_pde_encode(px_dma(pt),
  I915_CACHE_LLC);
+   trace_i915_page_table_entry_map(ppgtt-base, pde, pt,
+   gen8_pte_index(start),
+   gen8_pte_count(start, 
length),
+   GEN8_PTES);

/* NB: We haven't yet mapped ptes to pages. At this
 * point we're still relying on insert_entries() */
diff --git a/drivers/gpu/drm/i915/i915_trace.h 
b/drivers/gpu/drm/i915/i915_trace.h
index 2f34c47..f230d76 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -186,33 +186,41 @@ DEFINE_EVENT(i915_va, i915_va_alloc,
 TP_ARGS(vm, start, length, name)
  );

-DECLARE_EVENT_CLASS(i915_page_table_entry,
-   TP_PROTO(struct i915_address_space *vm, u32 pde, u64 start, u64 
pde_shift),
-   TP_ARGS(vm, pde, start, pde_shift),
+DECLARE_EVENT_CLASS(i915_px_entry,
+   TP_PROTO(struct i915_address_space *vm, u32 px, u64 start, u64 
px_shift),
+   TP_ARGS(vm, px, start, px_shift),

TP_STRUCT__entry(
__field(struct i915_address_space *, vm)
-   __field(u32, pde)
+   __field(u32, px)
__field(u64, start)
__field(u64, end)
),

TP_fast_assign(
__entry-vm = vm;
-   __entry-pde = pde;
+   __entry-px = px;
__entry-start = start;
-   __entry-end = ((start + (1ULL  pde_shift))  ~((1ULL  
pde_shift)-1)) - 1;
+   __entry-end = ((start + (1ULL  px_shift))  ~((1ULL  
px_shift)-1)) - 1;
),

TP_printk(vm=%p, pde=%d (0x%llx-0x%llx),
- __entry-vm, __entry-pde, __entry-start, __entry-end)
+ __entry-vm, __entry-px, __entry-start, __entry-end)
  );

-DEFINE_EVENT(i915_page_table_entry, i915_page_table_entry_alloc,
+DEFINE_EVENT(i915_px_entry, i915_page_table_entry_alloc,
 TP_PROTO(struct i915_address_space *vm, u32 pde, u64 start, u64 
pde_shift),
 TP_ARGS(vm, pde, start, pde_shift)
  );

+DEFINE_EVENT_PRINT(i915_px_entry, i915_page_directory_entry_alloc,
+  TP_PROTO(struct i915_address_space *vm, u32 pdpe, u64 start, 
u64 pdpe_shift),
+  TP_ARGS(vm, pdpe, start, pdpe_shift),
+
+  TP_printk(vm=%p, pdpe=%d (0x%llx-0x%llx),
+__entry-vm, __entry-px, __entry-start, 
__entry-end)
+);
+
  /* Avoid extra math because we only support two sizes. The format is defined 
by
   * bitmap_scnprintf. Each 32 bits is 8 HEX digits followed by comma */
  #define TRACE_PT_SIZE(bits) \


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v6 06/19] drm/i915/gen8: Add PML4 structure

2015-07-29 Thread Goel, Akash



On 7/29/2015 9:53 PM, Michel Thierry wrote:

Introduces the Page Map Level 4 (PML4), ie. the new top level structure
of the page tables.

To facilitate testing, 48b mode will be available on Broadwell and
GEN9+, when i915.enable_ppgtt = 3.

v2: Remove unnecessary CONFIG_X86_64 checks, ppgtt code is already
32/64-bit safe (Chris).
v3: Add goto free_scratch in temp 48-bit mode init code (Akash).

Cc: Akash Goel akash.g...@intel.com
Signed-off-by: Michel Thierry michel.thie...@intel.com
---
  drivers/gpu/drm/i915/i915_drv.h |  3 ++-
  drivers/gpu/drm/i915/i915_gem_gtt.c | 38 -
  drivers/gpu/drm/i915/i915_gem_gtt.h | 26 -
  3 files changed, 48 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 40fea41..0b5cbe8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2498,7 +2498,8 @@ struct drm_i915_cmd_table {
  #define HAS_HW_CONTEXTS(dev)  (INTEL_INFO(dev)-gen = 6)
  #define HAS_LOGICAL_RING_CONTEXTS(dev)(INTEL_INFO(dev)-gen = 8)
  #define USES_PPGTT(dev)   (i915.enable_ppgtt)
-#define USES_FULL_PPGTT(dev)   (i915.enable_ppgtt == 2)
+#define USES_FULL_PPGTT(dev)   (i915.enable_ppgtt = 2)
+#define USES_FULL_48BIT_PPGTT(dev) (i915.enable_ppgtt == 3)

  #define HAS_OVERLAY(dev)  (INTEL_INFO(dev)-has_overlay)
  #define OVERLAY_NEEDS_PHYSICAL(dev)   
(INTEL_INFO(dev)-overlay_needs_physical)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 8c1db92..1a120a4 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -104,9 +104,12 @@ static int sanitize_enable_ppgtt(struct drm_device *dev, 
int enable_ppgtt)
  {
bool has_aliasing_ppgtt;
bool has_full_ppgtt;
+   bool has_full_64bit_ppgtt;

has_aliasing_ppgtt = INTEL_INFO(dev)-gen = 6;
has_full_ppgtt = INTEL_INFO(dev)-gen = 7;
+   has_full_64bit_ppgtt = (IS_BROADWELL(dev) ||
+   INTEL_INFO(dev)-gen = 9)  false; /* FIXME: 
64b */

if (intel_vgpu_active(dev))
has_full_ppgtt = false; /* emulation is too hard */
@@ -125,6 +128,9 @@ static int sanitize_enable_ppgtt(struct drm_device *dev, 
int enable_ppgtt)
if (enable_ppgtt == 2  has_full_ppgtt)
return 2;

+   if (enable_ppgtt == 3  has_full_64bit_ppgtt)
+   return 3;
+
  #ifdef CONFIG_INTEL_IOMMU
/* Disable ppgtt on SNB if VT-d is on. */
if (INTEL_INFO(dev)-gen == 6  intel_iommu_gfx_mapped) {
@@ -557,6 +563,8 @@ static void free_pdp(struct drm_device *dev,
 struct i915_page_directory_pointer *pdp)
  {
__pdp_fini(pdp);
+   if (USES_FULL_48BIT_PPGTT(dev))
+   kfree(pdp);


Sorry for the late comment.
This change is a bit of distraction here, should be moved to the 
following 'alloc/free for 4lvl' patch.


Best regards
Akash


  }

  /* Broadwell Page Directory Pointer Descriptors */
@@ -686,9 +694,6 @@ gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
pt_vaddr = NULL;

for_each_sg_page(pages-sgl, sg_iter, pages-nents, 0) {
-   if (WARN_ON(pdpe = GEN8_LEGACY_PDPES))
-   break;
-
if (pt_vaddr == NULL) {
struct i915_page_directory *pd = 
pdp-page_directory[pdpe];
struct i915_page_table *pt = pd-page_table[pde];
@@ -1102,14 +1107,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
return ret;

ppgtt-base.start = 0;
-   ppgtt-base.total = 1ULL  32;
-   if (IS_ENABLED(CONFIG_X86_32))
-   /* While we have a proliferation of size_t variables
-* we cannot represent the full ppgtt size on 32bit,
-* so limit it to the same size as the GGTT (currently
-* 2GiB).
-*/
-   ppgtt-base.total = to_i915(ppgtt-base.dev)-gtt.base.total;
ppgtt-base.cleanup = gen8_ppgtt_cleanup;
ppgtt-base.allocate_va_range = gen8_alloc_va_range;
ppgtt-base.insert_entries = gen8_ppgtt_insert_entries;
@@ -1119,10 +1116,25 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)

ppgtt-switch_mm = gen8_mm_switch;

-   ret = __pdp_init(false, ppgtt-pdp);
+   if (!USES_FULL_48BIT_PPGTT(ppgtt-base.dev)) {
+   ret = __pdp_init(false, ppgtt-pdp);

-   if (ret)
+   if (ret)
+   goto free_scratch;
+
+   ppgtt-base.total = 1ULL  32;
+   if (IS_ENABLED(CONFIG_X86_32))
+   /* While we have a proliferation of size_t variables
+* we cannot represent the full ppgtt size on 32bit,
+* so limit it to the same size as the GGTT (currently
+* 2GiB).
+  

Re: [Intel-gfx] [PATCH 3/3] drm/i915: Support DDI lane reversal for DP

2015-07-29 Thread Sivakumar Thulasimani



On 7/29/2015 8:52 PM, Benjamin Tissoires wrote:

On Jul 29 2015 or thereabouts, Sivakumar Thulasimani wrote:

why not detect reverse in intel_dp_detect/intel_hpd_pulse ? that way you can
identify both lane count and reversal state without touching anything in the
link training code. i am yet to upstream my changes for CHT that i can share
if required that does the same in intel_dp_detect without touching any line
in link training path.

With my current limited knowledge of the dp hotplug (and i915 driver) I
am not sure we could detect the reversed state without trying to train 1
lane only. I'd be glad to look at your changes and test them on my
system if you think that could help having a cleaner solution.

Cheers,
Benjamin
No, what i recommended was to do link training but in intel_dp_detect. 
Since USB Type C cable
also has its own lane count restriction (it can have different lane 
count than the one supported
by panel) you might have to figure that out as well. so both reversal 
and lane count detection
can be done outside the modeset path and keep the code free of type C 
changes outside

detection path.

Please find below the code to do the same. Do not waste time trying to 
apply this directly on
nightly since this is based on a local tree and because this is pre- 
atomic changes code, so you
might have to modify chv_upfront_link_train to work on top of the latest 
nightly code. we

are supposed to upstream this and is in my todo list.

---

Author: Durgadoss R durgados...@intel.com
Date:   Fri May 22 14:30:07 2015 +0530

   drm/i915: Enable Upfront link training for type-C DP support

To support USB type C alternate DP mode, the display driver needs 
to know the
number of lanes required by DP panel as well as number of lanes 
that can be
supported by the type-C cable. Sometimes, the type-C cable may 
limit the
bandwidth even if Panel can support more lanes. To address these 
scenarios,
the display driver will start link training with max lanes, and if 
the link

training fails, the driver then falls back to x2 lanes.

* Since link training is done before modeset, planes are not 
enabled. Only

  encoder and the its associated PLLs are enabled.
* Once link training is done, the encoder and its PLLs are 
disabled; so that

  the subsequent modeset is not aware of these changes.
* As of now, this is tested only on CHV.

Signed-off-by: Durgadoss R durgados...@intel.com

diff --git a/drivers/gpu/drm/i915/intel_display.c 
b/drivers/gpu/drm/i915/intel_display.c

index 0c8ae2a..c72dcaa 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -14793,3 +14793,121 @@ intel_display_print_error_state(struct 
drm_i915_error_state_buf *m,

 err_printf(m,   VSYNC: %08x\n, error-transcoder[i].vsync);
 }
 }
+
+bool chv_upfront_link_train(struct drm_device *dev,
+struct intel_dp *intel_dp, struct intel_crtc *crtc)
+{
+struct drm_i915_private *dev_priv = dev-dev_private;
+struct intel_connector *connector = intel_dp-attached_connector;
+struct intel_encoder *encoder = connector-encoder;
+bool found = false;
+bool valid_crtc = false;
+
+if (!connector || !encoder) {
+DRM_DEBUG_KMS(dp connector/encoder is NULL\n);
+return false;
+}
+
+/* If we already have a crtc, start link training directly */
+if (crtc) {
+valid_crtc = true;
+goto start_link_train;
+}
+
+/* Find an unused crtc and use it for link training */
+for_each_intel_crtc(dev, crtc) {
+if (intel_crtc_active(crtc-base))
+continue;
+
+connector-new_encoder = encoder;
+encoder-new_crtc = crtc;
+encoder-base.crtc = crtc-base;
+
+/* Make sure the new crtc will work with the encoder */
+if (drm_encoder_crtc_ok(encoder-base,
+ crtc-base)) {
+found = true;
+break;
+}
+}
+
+if (!found) {
+DRM_ERROR(Could not find crtc for upfront link training\n);
+return false;
+}
+
+start_link_train:
+
+DRM_DEBUG_KMS(upfront link training on pipe:%c\n,
+pipe_name(crtc-pipe));
+found = false;
+
+/* Initialize with Max Link rate  lane count supported by panel */
+intel_dp-link_bw =  intel_dp-dpcd[DP_MAX_LINK_RATE];
+intel_dp-lane_count = intel_dp-dpcd[DP_MAX_LANE_COUNT] 
+DP_MAX_LANE_COUNT_MASK;
+
+do {
+/* Find port clock from link_bw */
+crtc-config.port_clock =
+drm_dp_bw_code_to_link_rate(intel_dp-link_bw);
+
+/* Enable PLL followed by port */
+intel_dp_set_clock(encoder, crtc-config, intel_dp-link_bw);
+chv_update_pll(crtc);
+encoder-pre_pll_enable(encoder);
+chv_enable_pll(crtc);
+

Re: [Intel-gfx] [PATCH v6 08/19] drm/i915/gen8: Add 4 level switching infrastructure and lrc support

2015-07-29 Thread Goel, Akash



On 7/29/2015 9:53 PM, Michel Thierry wrote:

In 64b (48bit canonical) PPGTT addressing, the PDP0 register contains
the base address to PML4, while the other PDP registers are ignored.

In LRC, the addressing mode must be specified in every context
descriptor, and the base address to PML4 is stored in the reg state.

v2: PML4 update in legacy context switch is left for historic reasons,
the preferred mode of operation is with lrc context based submission.
v3: s/gen8_map_page_directory/gen8_setup_page_directory and
s/gen8_map_page_directory_pointer/gen8_setup_page_directory_pointer.
Also, clflush will be needed for bxt. (Akash)
v4: Squashed lrc-specific code and use a macro to set PML4 register.
v5: Rebase after Mika's ppgtt cleanup / scratch merge patch series.
PDP update in bb_start is only for legacy 32b mode.
v6: Rebase after final merged version of Mika's ppgtt/scratch
patches.
v7: There is no need to update the pml4 register value in
execlists_update_context. (Akash)
v8: Move pd and pdp setup functions to a previous patch, they do not
belong here. (Akash)

Cc: Akash Goel akash.g...@intel.com
Signed-off-by: Ben Widawsky b...@bwidawsk.net
Signed-off-by: Michel Thierry michel.thie...@intel.com (v2+)
---
  drivers/gpu/drm/i915/i915_gem_gtt.c | 17 +++
  drivers/gpu/drm/i915/i915_reg.h |  1 +
  drivers/gpu/drm/i915/intel_lrc.c| 60 ++---
  3 files changed, 55 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 4179b80..c6c8af7 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -656,8 +656,8 @@ static int gen8_write_pdp(struct drm_i915_gem_request *req,
return 0;
  }

-static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
- struct drm_i915_gem_request *req)
+static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt,
+struct drm_i915_gem_request *req)
  {
int i, ret;

@@ -672,6 +672,12 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
return 0;
  }

+static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt,
+ struct drm_i915_gem_request *req)
+{
+   return gen8_write_pdp(req, 0, px_dma(ppgtt-pml4));
+}
+
  static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm,
   struct i915_page_directory_pointer *pdp,
   uint64_t start,
@@ -1318,14 +1324,13 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
ppgtt-base.unbind_vma = ppgtt_unbind_vma;
ppgtt-base.bind_vma = ppgtt_bind_vma;

-   ppgtt-switch_mm = gen8_mm_switch;
-
if (USES_FULL_48BIT_PPGTT(ppgtt-base.dev)) {
ret = setup_px(ppgtt-base.dev, ppgtt-pml4);
if (ret)
goto free_scratch;

ppgtt-base.total = 1ULL  48;
+   ppgtt-switch_mm = gen8_48b_mm_switch;
} else {
ret = __pdp_init(false, ppgtt-pdp);
if (ret)
@@ -1340,6 +1345,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 */
ppgtt-base.total = 
to_i915(ppgtt-base.dev)-gtt.base.total;

+   ppgtt-switch_mm = gen8_legacy_mm_switch;
trace_i915_page_directory_pointer_entry_alloc(ppgtt-base,
  0, 0,
  GEN8_PML4E_SHIFT);
@@ -1537,8 +1543,9 @@ static void gen8_ppgtt_enable(struct drm_device *dev)
int j;

for_each_ring(ring, dev_priv, j) {
+   u32 four_level = USES_FULL_48BIT_PPGTT(dev) ? 
GEN8_GFX_PPGTT_48B : 0;
I915_WRITE(RING_MODE_GEN7(ring),
-  _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
+  _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level));
}
  }

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 3a77678..5bd1b6a 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -1670,6 +1670,7 @@ enum skl_disp_power_wells {
  #define   GFX_REPLAY_MODE (111)
  #define   GFX_PSMI_GRANULARITY(110)
  #define   GFX_PPGTT_ENABLE(19)
+#define   GEN8_GFX_PPGTT_48B   (17)

  #define VLV_DISPLAY_BASE 0x18
  #define VLV_MIPI_BASE VLV_DISPLAY_BASE
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 99bba8e..0b65188 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -196,13 +196,21 @@
reg_state[CTX_PDP ## n ## _LDW+1] = lower_32_bits(_addr); \
  }

+#define ASSIGN_CTX_PML4(ppgtt, reg_state) { \
+   reg_state[CTX_PDP0_UDW + 1] = upper_32_bits(px_dma(ppgtt-pml4)); \
+   reg_state[CTX_PDP0_LDW + 1] = 

Re: [Intel-gfx] drm properties, ABI and compositors

2015-07-29 Thread Dave Airlie
 I've discussed drm props and ABI requirements a bit with Dave on irc.
 In the past we've been pretty lax with properties since connector
 properties are mostly meant for end-users to set manually, so not
 really much point in standardizing and treating them like ABI. But now
 we have props for plane/CRTC and atomic and those are really meant to
 be used by compositors, so all the problems with ABI start to kick in.
 And we had them already, e.g. early i915 patches for rotation where cw
 while existing omap supports was ccw. I also just spotted msm patches
 which reinvent the mirror flags of the rotation prop with their own
 flip prop. And there's a lot of things in-progress already like
 zpos/alpha/blending props, color manager/per-plane gamma, ...

 To avoid future ABI disaster I think we should treat these props like
 any other drm ABI and require full-blown userspace, so here that would
 be a real implementation in something like weston, -modesetting, the
 new cros thing or maybe even hwc if that ever happens as an
 open-source project. And test tools like modetest don't cut it since
 upside down desktop is obvious, upside down test pattern meh. And
 modetest doesn't bother with all the TEST_ONLY and failure recory
 stuff like e.g. weston atomic needs to.

 Internally I think we should also try to standarize prop handling by
 pushing them into drm_*_state structs and adding decoding in core and
 good helpers. And hopefully soon we have markdown for kerneldoc so can
 transform that horrible docbook table into something sane. But that's
 just internals which we can always fix. ABI's forever.

 Anyway this is all kinda just clarification at least for i915. props
 for compositors are ABI like anything else, same rules still apply.

Yes totally, no adding props for closed compositors as well if we
can't use it with weston/mutter/open source stuff we can't test it.

(are there any closed source compositors?)

Dave.
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v6 01/19] drm/i915: Remove unnecessary gen8_clamp_pd

2015-07-29 Thread Goel, Akash

Reviewed the patch  it looks fine.
Reviewed-by: Akash Goel akash.g...@intel.com

On 7/29/2015 9:53 PM, Michel Thierry wrote:

gen8_clamp_pd clamps to the next page directory boundary, but the macro
gen8_for_each_pde already has a check to stop at the page directory
boundary.

Furthermore, i915_pte_count also restricts to the next page table
boundary.

v2: Rebase after Mika's ppgtt cleanup / scratch merge patch series.

Suggested-by: Akash Goel akash.g...@intel.com
Signed-off-by: Michel Thierry michel.thie...@intel.com
---
  drivers/gpu/drm/i915/i915_gem_gtt.c |  2 +-
  drivers/gpu/drm/i915/i915_gem_gtt.h | 11 ---
  2 files changed, 1 insertion(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index c2a291e..189572d 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -955,7 +955,7 @@ static int gen8_alloc_va_range(struct i915_address_space 
*vm,
gen8_for_each_pdpe(pd, ppgtt-pdp, start, length, temp, pdpe) {
gen8_pde_t *const page_directory = kmap_px(pd);
struct i915_page_table *pt;
-   uint64_t pd_len = gen8_clamp_pd(start, length);
+   uint64_t pd_len = length;
uint64_t pd_start = start;
uint32_t pde;

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h 
b/drivers/gpu/drm/i915/i915_gem_gtt.h
index e1cfa29..d5bf953 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -444,17 +444,6 @@ static inline uint32_t gen6_pde_index(uint32_t addr)
 temp = min(temp, length),  \
 start += temp, length -= temp)

-/* Clamp length to the next page_directory boundary */
-static inline uint64_t gen8_clamp_pd(uint64_t start, uint64_t length)
-{
-   uint64_t next_pd = ALIGN(start + 1, 1  GEN8_PDPE_SHIFT);
-
-   if (next_pd  (start + length))
-   return length;
-
-   return next_pd - start;
-}
-
  static inline uint32_t gen8_pte_index(uint64_t address)
  {
return i915_pte_index(address, GEN8_PDE_SHIFT);


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v6 09/19] drm/i915/gen8: Pass sg_iter through pte inserts

2015-07-29 Thread Goel, Akash


Reviewed the patch  it looks fine.
Reviewed-by: Akash Goel akash.g...@intel.com

On 7/29/2015 9:53 PM, Michel Thierry wrote:

As a step towards implementing 4 levels, while not discarding the
existing pte insert functions, we need to pass the sg_iter through.
The current function understands to the page directory granularity.
An object's pages may span the page directory, and so using the iter
directly as we write the PTEs allows the iterator to stay coherent
through a VMA insert operation spanning multiple page table levels.

v2: Rebase after s/page_tables/page_table/.
v3: Rebase after Mika's ppgtt cleanup / scratch merge patch series;
updated commit message (s/map/insert).

Signed-off-by: Ben Widawsky b...@bwidawsk.net
Signed-off-by: Michel Thierry michel.thie...@intel.com (v2+)
---
  drivers/gpu/drm/i915/i915_gem_gtt.c | 11 ++-
  1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index c6c8af7..7c024e98 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -749,7 +749,7 @@ static void gen8_ppgtt_clear_range(struct 
i915_address_space *vm,
  static void
  gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
  struct i915_page_directory_pointer *pdp,
- struct sg_table *pages,
+ struct sg_page_iter *sg_iter,
  uint64_t start,
  enum i915_cache_level cache_level)
  {
@@ -759,11 +759,10 @@ gen8_ppgtt_insert_pte_entries(struct i915_address_space 
*vm,
unsigned pdpe = start  GEN8_PDPE_SHIFT  GEN8_PDPE_MASK;
unsigned pde = start  GEN8_PDE_SHIFT  GEN8_PDE_MASK;
unsigned pte = start  GEN8_PTE_SHIFT  GEN8_PTE_MASK;
-   struct sg_page_iter sg_iter;

pt_vaddr = NULL;

-   for_each_sg_page(pages-sgl, sg_iter, pages-nents, 0) {
+   while (__sg_page_iter_next(sg_iter)) {
if (pt_vaddr == NULL) {
struct i915_page_directory *pd = 
pdp-page_directory[pdpe];
struct i915_page_table *pt = pd-page_table[pde];
@@ -771,7 +770,7 @@ gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
}

pt_vaddr[pte] =
-   gen8_pte_encode(sg_page_iter_dma_address(sg_iter),
+   gen8_pte_encode(sg_page_iter_dma_address(sg_iter),
cache_level, true);
if (++pte == GEN8_PTES) {
kunmap_px(ppgtt, pt_vaddr);
@@ -797,8 +796,10 @@ static void gen8_ppgtt_insert_entries(struct 
i915_address_space *vm,
struct i915_hw_ppgtt *ppgtt =
container_of(vm, struct i915_hw_ppgtt, base);
struct i915_page_directory_pointer *pdp = ppgtt-pdp; /* FIXME: 48b */
+   struct sg_page_iter sg_iter;

-   gen8_ppgtt_insert_pte_entries(vm, pdp, pages, start, cache_level);
+   __sg_page_iter_start(sg_iter, pages-sgl, sg_nents(pages-sgl), 0);
+   gen8_ppgtt_insert_pte_entries(vm, pdp, sg_iter, start, cache_level);
  }

  static void gen8_free_page_tables(struct drm_device *dev,


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v6 02/19] drm/i915/gen8: Make pdp allocation more dynamic

2015-07-29 Thread Goel, Akash

Reviewed the patch  it looks fine.
Reviewed-by: Akash Goel akash.g...@intel.com

On 7/29/2015 9:53 PM, Michel Thierry wrote:

This transitional patch doesn't do much for the existing code. However,
it should make upcoming patches to use the full 48b address space a bit
easier.

v2: Renamed  pdp_free to be similar to  pd/pt (unmap_and_free_pdp).
v3: To facilitate testing, 48b mode will be available on Broadwell and
GEN9+, when i915.enable_ppgtt = 3.
v4: Rebase after s/page_tables/page_table/, added extra information
about 4-level page table formats and use IS_ENABLED macro.
v5: Check CONFIG_X86_64 instead of CONFIG_64BIT.
v6: Rebase after Mika's ppgtt cleanup / scratch merge patch series, and
follow
his nomenclature in pdp functions (there is no alloc_pdp yet).
v7: Rebase after merged version of Mika's ppgtt cleanup patch series.
v8: Rebase after final merged version of Mika's ppgtt/scratch patches.
v9: Introduce PML4 (and 48-bit checks) until next patch (Akash).
v10: Also use test_bit to detect when pd/pt are already allocated (Akash)

Cc: Akash Goel akash.g...@intel.com
Signed-off-by: Ben Widawsky b...@bwidawsk.net
Signed-off-by: Michel Thierry michel.thie...@intel.com (v2+)
---
  drivers/gpu/drm/i915/i915_gem_gtt.c | 86 +
  drivers/gpu/drm/i915/i915_gem_gtt.h | 17 +---
  2 files changed, 80 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 189572d..28f3227 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -522,6 +522,43 @@ static void gen8_initialize_pd(struct i915_address_space 
*vm,
fill_px(vm-dev, pd, scratch_pde);
  }

+static int __pdp_init(struct drm_device *dev,
+ struct i915_page_directory_pointer *pdp)
+{
+   size_t pdpes = I915_PDPES_PER_PDP(dev);
+
+   pdp-used_pdpes = kcalloc(BITS_TO_LONGS(pdpes),
+ sizeof(unsigned long),
+ GFP_KERNEL);
+   if (!pdp-used_pdpes)
+   return -ENOMEM;
+
+   pdp-page_directory = kcalloc(pdpes, sizeof(*pdp-page_directory),
+ GFP_KERNEL);
+   if (!pdp-page_directory) {
+   kfree(pdp-used_pdpes);
+   /* the PDP might be the statically allocated top level. Keep it
+* as clean as possible */
+   pdp-used_pdpes = NULL;
+   return -ENOMEM;
+   }
+
+   return 0;
+}
+
+static void __pdp_fini(struct i915_page_directory_pointer *pdp)
+{
+   kfree(pdp-used_pdpes);
+   kfree(pdp-page_directory);
+   pdp-page_directory = NULL;
+}
+
+static void free_pdp(struct drm_device *dev,
+struct i915_page_directory_pointer *pdp)
+{
+   __pdp_fini(pdp);
+}
+
  /* Broadwell Page Directory Pointer Descriptors */
  static int gen8_write_pdp(struct drm_i915_gem_request *req,
  unsigned entry,
@@ -720,7 +757,8 @@ static void gen8_ppgtt_cleanup(struct i915_address_space 
*vm)
container_of(vm, struct i915_hw_ppgtt, base);
int i;

-   for_each_set_bit(i, ppgtt-pdp.used_pdpes, GEN8_LEGACY_PDPES) {
+   for_each_set_bit(i, ppgtt-pdp.used_pdpes,
+I915_PDPES_PER_PDP(ppgtt-base.dev)) {
if (WARN_ON(!ppgtt-pdp.page_directory[i]))
continue;

@@ -729,6 +767,7 @@ static void gen8_ppgtt_cleanup(struct i915_address_space 
*vm)
free_pd(ppgtt-base.dev, ppgtt-pdp.page_directory[i]);
}

+   free_pdp(ppgtt-base.dev, ppgtt-pdp);
gen8_free_scratch(vm);
  }

@@ -763,7 +802,7 @@ static int gen8_ppgtt_alloc_pagetabs(struct i915_hw_ppgtt 
*ppgtt,

gen8_for_each_pde(pt, pd, start, length, temp, pde) {
/* Don't reallocate page tables */
-   if (pt) {
+   if (test_bit(pde, pd-used_pdes)) {
/* Scratch is never allocated this way */
WARN_ON(pt == ppgtt-base.scratch_pt);
continue;
@@ -820,11 +859,12 @@ static int gen8_ppgtt_alloc_page_directories(struct 
i915_hw_ppgtt *ppgtt,
struct i915_page_directory *pd;
uint64_t temp;
uint32_t pdpe;
+   uint32_t pdpes = I915_PDPES_PER_PDP(dev);

-   WARN_ON(!bitmap_empty(new_pds, GEN8_LEGACY_PDPES));
+   WARN_ON(!bitmap_empty(new_pds, pdpes));

gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
-   if (pd)
+   if (test_bit(pdpe, pdp-used_pdpes))
continue;

pd = alloc_pd(dev);
@@ -839,18 +879,19 @@ static int gen8_ppgtt_alloc_page_directories(struct 
i915_hw_ppgtt *ppgtt,
return 0;

  unwind_out:
-   for_each_set_bit(pdpe, new_pds, GEN8_LEGACY_PDPES)
+   for_each_set_bit(pdpe, new_pds, pdpes)
free_pd(dev, pdp-page_directory[pdpe]);

return 

Re: [Intel-gfx] [PATCH v6 04/19] drm/i915/gen8: Generalize PTE writing for GEN8 PPGTT

2015-07-29 Thread Goel, Akash


On 7/29/2015 9:53 PM, Michel Thierry wrote:

The insert_entries function was the function used to write PTEs. For the
PPGTT it was hardcoded to only understand two level page tables, which
was the case for GEN7. We can reuse this for 4 level page tables, and
remove the concept of insert_entries, which was never viable past 2
level page tables anyway, but it requires a bit of rework to make the
function a bit more generic.

This patch begins the generalization work, and it will be heavily used
upon when the 48b code is complete. The patch series attempts to make
each function which touches a part of code specific to the page table
level and here is no exception.

v2: Rebase after Mika's ppgtt cleanup / scratch merge patch series.
v3: Rebase after final merged version of Mika's ppgtt/scratch patches.

Signed-off-by: Ben Widawsky b...@bwidawsk.net
Signed-off-by: Michel Thierry michel.thie...@intel.com (v2)
---
  drivers/gpu/drm/i915/i915_gem_gtt.c | 52 +++--
  1 file changed, 39 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index bd56979..f338a13 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -600,24 +600,21 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
return 0;
  }

-static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
-  uint64_t start,
-  uint64_t length,
-  bool use_scratch)
+static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm,
+  struct i915_page_directory_pointer *pdp,
+  uint64_t start,
+  uint64_t length,
+  gen8_pte_t scratch_pte)
  {
struct i915_hw_ppgtt *ppgtt =
container_of(vm, struct i915_hw_ppgtt, base);
-   struct i915_page_directory_pointer *pdp = ppgtt-pdp; /* FIXME: 48b */
-   gen8_pte_t *pt_vaddr, scratch_pte;
+   gen8_pte_t *pt_vaddr;
unsigned pdpe = start  GEN8_PDPE_SHIFT  GEN8_PDPE_MASK;
unsigned pde = start  GEN8_PDE_SHIFT  GEN8_PDE_MASK;
unsigned pte = start  GEN8_PTE_SHIFT  GEN8_PTE_MASK;
unsigned num_entries = length  PAGE_SHIFT;
unsigned last_pte, i;

-   scratch_pte = gen8_pte_encode(px_dma(ppgtt-base.scratch_page),
- I915_CACHE_LLC, use_scratch);
-


Sorry for the late comment.
Would it be better to have a WARN_ON check here on NULL value of pdp 
pointer, considering the pdp will no longer be static in case of 48 bit.


Actually there are already such checks used in this function for pd, pt 
and page pointers.


Best regards
Akash


while (num_entries) {
struct i915_page_directory *pd;
struct i915_page_table *pt;
@@ -656,14 +653,30 @@ static void gen8_ppgtt_clear_range(struct 
i915_address_space *vm,
}
  }

-static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
- struct sg_table *pages,
- uint64_t start,
- enum i915_cache_level cache_level, u32 
unused)
+static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
+  uint64_t start,
+  uint64_t length,
+  bool use_scratch)
  {
struct i915_hw_ppgtt *ppgtt =
container_of(vm, struct i915_hw_ppgtt, base);
struct i915_page_directory_pointer *pdp = ppgtt-pdp; /* FIXME: 48b */
+
+   gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm-scratch_page),
+I915_CACHE_LLC, use_scratch);
+
+   gen8_ppgtt_clear_pte_range(vm, pdp, start, length, scratch_pte);
+}
+
+static void
+gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
+ struct i915_page_directory_pointer *pdp,
+ struct sg_table *pages,
+ uint64_t start,
+ enum i915_cache_level cache_level)
+{
+   struct i915_hw_ppgtt *ppgtt =
+   container_of(vm, struct i915_hw_ppgtt, base);
gen8_pte_t *pt_vaddr;
unsigned pdpe = start  GEN8_PDPE_SHIFT  GEN8_PDPE_MASK;
unsigned pde = start  GEN8_PDE_SHIFT  GEN8_PDE_MASK;
@@ -700,6 +713,19 @@ static void gen8_ppgtt_insert_entries(struct 
i915_address_space *vm,
kunmap_px(ppgtt, pt_vaddr);
  }

+static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
+ struct sg_table *pages,
+ uint64_t start,
+ enum i915_cache_level cache_level,
+   

Re: [Intel-gfx] [PATCH v6 11/19] drm/i915/gen8: Initialize PDPs and PML4

2015-07-29 Thread Goel, Akash

Reviewed the patch  it looks fine.
Reviewed-by: Akash Goel akash.g...@intel.com

On 7/29/2015 9:53 PM, Michel Thierry wrote:

Similar to PDs, while setting up a page directory pointer, make all entries
of the pdp point to the scratch pd before mapping (and make all its entries
point to the scratch page); this is to be safe in case of out of bound
access or  proactive prefetch.

Also add a scratch pdp, which the PML4 entries point to.

v2: Handle scratch_pdp allocation failure correctly, and keep
initialize_px functions together (Akash)
v3: Rebase after Mika's ppgtt cleanup / scratch merge patch series. Rely on
the added macros to initialize the pdps.
v4: Rebase after final merged version of Mika's ppgtt/scratch patches
(and removed commit message part related to v3).
v5: Update commit message to also mention PML4 table initialization and
the new scratch pdp (Akash).

Suggested-by: Akash Goel akash.g...@intel.com
Signed-off-by: Michel Thierry michel.thie...@intel.com
---
  drivers/gpu/drm/i915/i915_gem_gtt.c | 38 +
  drivers/gpu/drm/i915/i915_gem_gtt.h |  1 +
  2 files changed, 39 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 7070d42..73cfe56 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -602,6 +602,27 @@ static void free_pdp(struct drm_device *dev,
}
  }

+static void gen8_initialize_pdp(struct i915_address_space *vm,
+   struct i915_page_directory_pointer *pdp)
+{
+   gen8_ppgtt_pdpe_t scratch_pdpe;
+
+   scratch_pdpe = gen8_pdpe_encode(px_dma(vm-scratch_pd), I915_CACHE_LLC);
+
+   fill_px(vm-dev, pdp, scratch_pdpe);
+}
+
+static void gen8_initialize_pml4(struct i915_address_space *vm,
+struct i915_pml4 *pml4)
+{
+   gen8_ppgtt_pml4e_t scratch_pml4e;
+
+   scratch_pml4e = gen8_pml4e_encode(px_dma(vm-scratch_pdp),
+ I915_CACHE_LLC);
+
+   fill_px(vm-dev, pml4, scratch_pml4e);
+}
+
  static void
  gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt,
  struct i915_page_directory_pointer *pdp,
@@ -863,8 +884,20 @@ static int gen8_init_scratch(struct i915_address_space *vm)
return PTR_ERR(vm-scratch_pd);
}

+   if (USES_FULL_48BIT_PPGTT(dev)) {
+   vm-scratch_pdp = alloc_pdp(dev);
+   if (IS_ERR(vm-scratch_pdp)) {
+   free_pd(dev, vm-scratch_pd);
+   free_pt(dev, vm-scratch_pt);
+   free_scratch_page(dev, vm-scratch_page);
+   return PTR_ERR(vm-scratch_pdp);
+   }
+   }
+
gen8_initialize_pt(vm, vm-scratch_pt);
gen8_initialize_pd(vm, vm-scratch_pd);
+   if (USES_FULL_48BIT_PPGTT(dev))
+   gen8_initialize_pdp(vm, vm-scratch_pdp);

return 0;
  }
@@ -873,6 +906,8 @@ static void gen8_free_scratch(struct i915_address_space *vm)
  {
struct drm_device *dev = vm-dev;

+   if (USES_FULL_48BIT_PPGTT(dev))
+   free_pdp(dev, vm-scratch_pdp);
free_pd(dev, vm-scratch_pd);
free_pt(dev, vm-scratch_pt);
free_scratch_page(dev, vm-scratch_page);
@@ -1074,6 +1109,7 @@ gen8_ppgtt_alloc_page_dirpointers(struct 
i915_address_space *vm,
if (IS_ERR(pdp))
goto unwind_out;

+   gen8_initialize_pdp(vm, pdp);
pml4-pdps[pml4e] = pdp;
__set_bit(pml4e, new_pdps);
trace_i915_page_directory_pointer_entry_alloc(vm,
@@ -1353,6 +1389,8 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
if (ret)
goto free_scratch;

+   gen8_initialize_pml4(ppgtt-base, ppgtt-pml4);
+
ppgtt-base.total = 1ULL  48;
ppgtt-switch_mm = gen8_48b_mm_switch;
} else {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h 
b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 11d44b3..70c50e7 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -278,6 +278,7 @@ struct i915_address_space {
struct i915_page_scratch *scratch_page;
struct i915_page_table *scratch_pt;
struct i915_page_directory *scratch_pd;
+   struct i915_page_directory_pointer *scratch_pdp; /* GEN8+  48b PPGTT */

/**
 * List of objects currently involved in rendering.


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v6 10/19] drm/i915/gen8: Add 4 level support in insert_entries and clear_range

2015-07-29 Thread Goel, Akash

Reviewed the patch  it looks fine.
Reviewed-by: Akash Goel akash.g...@intel.com

On 7/29/2015 9:53 PM, Michel Thierry wrote:

When 48b is enabled, gen8_ppgtt_insert_entries needs to read the Page Map
Level 4 (PML4), before it selects which Page Directory Pointer (PDP)
it will write to.

Similarly, gen8_ppgtt_clear_range needs to get the correct PDP/PD range.

This patch was inspired by Ben's Depend exclusively on map and
unmap_vma.

v2: Rebase after s/page_tables/page_table/.
v3: Remove unnecessary pdpe loop in gen8_ppgtt_clear_range_4lvl and use
clamp_pdp in gen8_ppgtt_insert_entries (Akash).
v4: Merge gen8_ppgtt_clear_range_4lvl into gen8_ppgtt_clear_range to
maintain symmetry with gen8_ppgtt_insert_entries (Akash).
v5: Do not mix pages and bytes in insert_entries (Akash).
v6: Prevent overflow in sg_nents  PAGE_SHIFT, when inserting 4GB at
once.
v7: Rebase after Mika's ppgtt cleanup / scratch merge patch series.
Use gen8_px_index functions, and remove unnecessary number of pages
parameter in insert_pte_entries.
v8: Change gen8_ppgtt_clear_pte_range to stop at PDP boundary, instead of
adding and extra clamp function; remove unnecessary pdp_start/pdp_len
variables (Akash).
v9: pages-orig_nents instead of sg_nents(pages-sgl) to get the
length (Akash).

Cc: Akash Goel akash.g...@intel.com
Signed-off-by: Michel Thierry michel.thie...@intel.com
---
  drivers/gpu/drm/i915/i915_gem_gtt.c | 49 +++--
  1 file changed, 36 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 7c024e98..7070d42 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -687,9 +687,9 @@ static void gen8_ppgtt_clear_pte_range(struct 
i915_address_space *vm,
struct i915_hw_ppgtt *ppgtt =
container_of(vm, struct i915_hw_ppgtt, base);
gen8_pte_t *pt_vaddr;
-   unsigned pdpe = start  GEN8_PDPE_SHIFT  GEN8_PDPE_MASK;
-   unsigned pde = start  GEN8_PDE_SHIFT  GEN8_PDE_MASK;
-   unsigned pte = start  GEN8_PTE_SHIFT  GEN8_PTE_MASK;
+   unsigned pdpe = gen8_pdpe_index(start);
+   unsigned pde = gen8_pde_index(start);
+   unsigned pte = gen8_pte_index(start);
unsigned num_entries = length  PAGE_SHIFT;
unsigned last_pte, i;

@@ -725,7 +725,8 @@ static void gen8_ppgtt_clear_pte_range(struct 
i915_address_space *vm,

pte = 0;
if (++pde == I915_PDES) {
-   pdpe++;
+   if (++pdpe == I915_PDPES_PER_PDP(vm-dev))
+   break;
pde = 0;
}
}
@@ -738,12 +739,21 @@ static void gen8_ppgtt_clear_range(struct 
i915_address_space *vm,
  {
struct i915_hw_ppgtt *ppgtt =
container_of(vm, struct i915_hw_ppgtt, base);
-   struct i915_page_directory_pointer *pdp = ppgtt-pdp; /* FIXME: 48b */
-
gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm-scratch_page),
 I915_CACHE_LLC, use_scratch);

-   gen8_ppgtt_clear_pte_range(vm, pdp, start, length, scratch_pte);
+   if (!USES_FULL_48BIT_PPGTT(vm-dev)) {
+   gen8_ppgtt_clear_pte_range(vm, ppgtt-pdp, start, length,
+  scratch_pte);
+   } else {
+   uint64_t templ4, pml4e;
+   struct i915_page_directory_pointer *pdp;
+
+   gen8_for_each_pml4e(pdp, ppgtt-pml4, start, length, templ4, 
pml4e) {
+   gen8_ppgtt_clear_pte_range(vm, pdp, start, length,
+  scratch_pte);
+   }
+   }
  }

  static void
@@ -756,9 +766,9 @@ gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
struct i915_hw_ppgtt *ppgtt =
container_of(vm, struct i915_hw_ppgtt, base);
gen8_pte_t *pt_vaddr;
-   unsigned pdpe = start  GEN8_PDPE_SHIFT  GEN8_PDPE_MASK;
-   unsigned pde = start  GEN8_PDE_SHIFT  GEN8_PDE_MASK;
-   unsigned pte = start  GEN8_PTE_SHIFT  GEN8_PTE_MASK;
+   unsigned pdpe = gen8_pdpe_index(start);
+   unsigned pde = gen8_pde_index(start);
+   unsigned pte = gen8_pte_index(start);

pt_vaddr = NULL;

@@ -776,7 +786,8 @@ gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
kunmap_px(ppgtt, pt_vaddr);
pt_vaddr = NULL;
if (++pde == I915_PDES) {
-   pdpe++;
+   if (++pdpe == I915_PDPES_PER_PDP(vm-dev))
+   break;
pde = 0;
}
pte = 0;
@@ -795,11 +806,23 @@ static void gen8_ppgtt_insert_entries(struct 
i915_address_space *vm,
  {
struct i915_hw_ppgtt *ppgtt =
container_of(vm, struct i915_hw_ppgtt, 

Re: [Intel-gfx] [PATCH] drm/i915: Replace WARN inside I915_READ64_2x32 with retry loop

2015-07-29 Thread Daniel Vetter
On Tue, Jul 28, 2015 at 02:53:16PM +0100, Chris Wilson wrote:
 On Wed, Jul 15, 2015 at 09:50:42AM +0100, Chris Wilson wrote:
  Since we may conceivably encounter situations where the upper part of the
  64bit register changes between reads, for example when a timestamp
  counter overflows, change the WARN into a retry loop.
  
  Signed-off-by: Chris Wilson ch...@chris-wilson.co.uk
  Cc: Michał Winiarski michal.winiar...@intel.com
 
 Michał, as you correctly predicted this WARN will be hit in the wild
 after adjusting reg_read_ioctl(TIMESTAMP).
 
 Anyone care to review this patch?

Picked up for -fixes, thanks for the patch.
-Daniel

 
  ---
   drivers/gpu/drm/i915/i915_drv.h | 17 -
   1 file changed, 8 insertions(+), 9 deletions(-)
  
  diff --git a/drivers/gpu/drm/i915/i915_drv.h 
  b/drivers/gpu/drm/i915/i915_drv.h
  index ee53873ab252..cfddd69bd06c 100644
  --- a/drivers/gpu/drm/i915/i915_drv.h
  +++ b/drivers/gpu/drm/i915/i915_drv.h
  @@ -3484,15 +3484,14 @@ int intel_freq_opcode(struct drm_i915_private 
  *dev_priv, int val);
   #define I915_READ64(reg)   dev_priv-uncore.funcs.mmio_readq(dev_priv, 
  (reg), true)
   
   #define I915_READ64_2x32(lower_reg, upper_reg) ({  \
  -   u32 upper = I915_READ(upper_reg);   \
  -   u32 lower = I915_READ(lower_reg);   \
  -   u32 tmp = I915_READ(upper_reg); \
  -   if (upper != tmp) { \
  -   upper = tmp;\
  -   lower = I915_READ(lower_reg);   \
  -   WARN_ON(I915_READ(upper_reg) != upper); \
  -   }   \
  -   (u64)upper  32 | lower; })
  +   u32 upper, lower, tmp;  \
  +   tmp = I915_READ(upper_reg); \
  +   do {\
  +   upper = tmp;\
  +   lower = I915_READ(lower_reg);   \
  +   tmp = I915_READ(upper_reg); \
  +   } while (upper != tmp); \
  +   (u64)upper  32 | lower; })
   
   #define POSTING_READ(reg)  (void)I915_READ_NOTRACE(reg)
   #define POSTING_READ16(reg)(void)I915_READ16_NOTRACE(reg)
 
 -- 
 Chris Wilson, Intel Open Source Technology Centre
 ___
 Intel-gfx mailing list
 Intel-gfx@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 3/3] drm/i915: Support DDI lane reversal for DP

2015-07-29 Thread Sivakumar Thulasimani
why not detect reverse in intel_dp_detect/intel_hpd_pulse ? that way you 
can identify both lane count and reversal state without touching 
anything in the link training code. i am yet to upstream my changes for 
CHT that i can share if required that does the same in intel_dp_detect 
without touching any line in link training path.


On 7/28/2015 9:33 PM, Benjamin Tissoires wrote:

The DP outputs connected through a USB Type-C port can have inverted
lanes. To detect that case, we implement autodetection by training only
the first lane if it doesn't work, we assume that we need to invert
the lanes.

Tested on a Chromebook Pixel 2015 (samus) with a USB Type-C to HDMI
adapter and a Dell 4K and some various regular monitors.

Based on 2 patches from the ChromeOS tree by:
Stéphane Marchesin marc...@chromium.org
Todd Broch tbr...@chromium.org

Signed-off-by: Benjamin Tissoires benjamin.tissoi...@redhat.com
---
  drivers/gpu/drm/i915/intel_ddi.c | 13 +
  drivers/gpu/drm/i915/intel_dp.c  | 36 
  drivers/gpu/drm/i915/intel_drv.h |  1 +
  3 files changed, 50 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index 9a40bfb..0b0c1ec 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -2249,6 +2249,7 @@ static void intel_ddi_pre_enable(struct intel_encoder 
*intel_encoder)
enum port port = intel_ddi_get_encoder_port(intel_encoder);
int type = intel_encoder-type;
int hdmi_level;
+   bool reversed = false;
  
  	if (type == INTEL_OUTPUT_EDP) {

struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
@@ -2295,8 +2296,20 @@ static void intel_ddi_pre_enable(struct intel_encoder 
*intel_encoder)
if (type == INTEL_OUTPUT_DISPLAYPORT || type == INTEL_OUTPUT_EDP) {
struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
  
+		if (IS_BROADWELL(dev)  type == INTEL_OUTPUT_DISPLAYPORT) {

+   intel_ddi_init_dp_buf_reg(intel_encoder);
+   reversed = intel_dp_is_reversed(intel_dp);
+   }
+
intel_ddi_init_dp_buf_reg(intel_encoder);
  
+		if (IS_BROADWELL(dev)) {

+   if (reversed)
+   intel_dp-DP |= DDI_BUF_PORT_REVERSAL;
+   else
+   intel_dp-DP = ~DDI_BUF_PORT_REVERSAL;
+   }
+
intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON);
intel_dp_start_link_train(intel_dp);
intel_dp_complete_link_train(intel_dp);
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index b740987..18280cc 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -3820,6 +3820,42 @@ intel_dp_complete_link_train(struct intel_dp *intel_dp)
intel_dp-DP = DP;
  }
  
+bool intel_dp_is_reversed(struct intel_dp *intel_dp)

+{
+   struct drm_encoder *encoder = dp_to_dig_port(intel_dp)-base.base;
+   struct drm_device *dev = encoder-dev;
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   uint32_t DP = intel_dp-DP;
+
+   /*
+* Train with 1 lane. There is no guarantee that the monitor supports
+* 2 or 4 lanes, and we wouldn't see any asymetricity with 4 lanes.
+*/
+   const uint8_t lane_count = 1;
+   bool reversed;
+
+   if (!HAS_DDI(dev))
+   return false;
+
+   DP = ~(DDI_BUF_PORT_REVERSAL | DDI_PORT_WIDTH(4));
+   DP |= DDI_PORT_WIDTH(lane_count);
+
+   I915_WRITE(intel_dp-output_reg, DP);
+   POSTING_READ(intel_dp-output_reg);
+   udelay(600);
+
+   if (!_intel_dp_start_link_train(intel_dp, lane_count, DP, true))
+   return true;
+
+   reversed = !_intel_dp_complete_link_train(intel_dp, lane_count, DP, 
true);
+
+   /* clear training, we had only one lane */
+   intel_dp-train_set_valid = false;
+
+   return reversed;
+
+}
+
  void intel_dp_stop_link_train(struct intel_dp *intel_dp)
  {
intel_dp_set_link_train(intel_dp, intel_dp-DP,
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 320c9e6..cba00c6 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1169,6 +1169,7 @@ int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc);
  bool intel_dp_compute_config(struct intel_encoder *encoder,
 struct intel_crtc_state *pipe_config);
  bool intel_dp_is_edp(struct drm_device *dev, enum port port);
+bool intel_dp_is_reversed(struct intel_dp *intel_dp);
  enum irqreturn intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port,
  bool long_hpd);
  void intel_edp_backlight_on(struct intel_dp *intel_dp);


--
regards,
Sivakumar

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org

Re: [Intel-gfx] Error in inner loop in validate_cmds_sorted / out of bounds issue

2015-07-29 Thread Daniel Vetter
On Tue, Jul 28, 2015 at 08:36:18PM +0100, Chris Wilson wrote:
 On Tue, Jul 28, 2015 at 11:14:19AM -0700, Hanno Böck wrote:
  Hi,
  
  On Tue, 28 Jul 2015 10:14:51 +0200
  Daniel Vetter dan...@ffwll.ch wrote:
  
Indeed, nice catch. Could you please read
Documentation/SubmittingPatches and apply your Signed-off-by and
then we can accept this patch under your authorship.

Preferrably this is two patches, (a) fix the tables, (b) fix the
validator. That way we can delay enabling the validator if we need
to fix the tables for others.
  
  I think I have checked all tables, not just the ones used on my gpu,
  they should be fine. But I've splittet the patch.
  
   Also can you please add signed-off-by lines to your patch when
   resubmitting? See Documentation/SubmittingPatches for all the details.
  
  The patch already had a Signed-off-by line.
  
  The checkpatch script complains that it doesn't like the formatting of
  the CMD command. However I won't change that in this patch, as this is
  how the whole file is formatted. If this is wanted I can submit a patch
  changing the formatting afterwards, but I think this is an unrelated
  change.
  
  Please apply.
  
  -- 
  Hanno Böck
  http://hboeck.de/
  
  mail/jabber: ha...@hboeck.de
  GPG: BBB51E42
 
  Properly sort cmd tables.
 
 drm/i915: Properly sort MI coomand table
 
 In the future, we may want to speed up command/register searching using
 a bisection and so we require them to be in ascending order respectively
 by command value or register address. However, this was not true for one
 pair in the MI table; make it so.
  
  Signed-off-by: Hanno Boeck ha...@hboeck.de
 Reviewed-by: Chris Wilson ch...@chris-wilson.co.uk
 
  diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
  b/drivers/gpu/drm/i915/i915_cmd_parser.c
  index 306d9e4..5fdd8c8 100644
  --- a/drivers/gpu/drm/i915/i915_cmd_parser.c
  +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
  @@ -151,8 +151,8 @@ static const struct drm_i915_cmd_descriptor 
  render_cmds[] = {
  CMD(  MI_ARB_ON_OFF,SMI,F,  1,  R  ),
  CMD(  MI_PREDICATE, SMI,F,  1,  S  ),
  CMD(  MI_TOPOLOGY_FILTER,   SMI,F,  1,  S  ),
  -   CMD(  MI_DISPLAY_FLIP,  SMI,   !F,  0xFF,   R  ),
  CMD(  MI_SET_APPID, SMI,F,  1,  S  ),
  +   CMD(  MI_DISPLAY_FLIP,  SMI,   !F,  0xFF,   R  ),
  CMD(  MI_SET_CONTEXT,   SMI,   !F,  0xFF,   R  ),
  CMD(  MI_URB_CLEAR, SMI,   !F,  0xFF,   S  ),
  CMD(  MI_STORE_DWORD_IMM,   SMI,   !F,  0x3F,   B,
 
  Fix loop checking cmd tables.
 
 drm/i915: Fix command parser table validator
 
 As we may like to use a bisection search on the tables in future, we
 need them to be ordered. For convenience we expect the compiled tables
 to be order and check on initialisation. However, the validator used the
 wrong iterators failed to spot the misordered MI tables and instead
 walked off into the unknown (as spotted by kasan).
 
  Signed-off-by: Hanno Boeck ha...@hboeck.de
 Reviewed-by: Chris Wilson ch...@chris-wilson.co.uk

Ok I hand-assembled the patches since it's your first, but please followo
Documentation/Submitting patches next time around, i.e. 1 mail per patch,
no attachments, diff included at the bottom. Otherwise the tools can't
just pick up patches on my side. git send-email will do all that
formatting for you correctly, so that's what I recommend to use.

Anyway pachtes applied to drm-intel-next-queued, thanks a lot for doing
them.
-Daniel

 
  diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
  b/drivers/gpu/drm/i915/i915_cmd_parser.c
  index 306d9e4..3a53bf3 100644
  --- a/drivers/gpu/drm/i915/i915_cmd_parser.c
  +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
  @@ -564,7 +564,7 @@ static bool validate_cmds_sorted(struct intel_engine_cs 
  *ring,
   
  for (j = 0; j  table-count; j++) {
  const struct drm_i915_cmd_descriptor *desc =
  -   table-table[i];
  +   table-table[j];
  u32 curr = desc-cmd.value  desc-cmd.mask;
   
  if (curr  previous) {
 
 -- 
 Chris Wilson, Intel Open Source Technology Centre

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH i-g-t] tests/gem_render_linear_blits: Increase min swap required

2015-07-29 Thread Chris Wilson
On Wed, Jul 29, 2015 at 09:30:21AM +0100, Derek Morton wrote:
 The swap-thrash subtest had a requirement that swap memory be
 present but no minimum amount was specified. The subtest allowed
 for half the total swap memory for overhead. Some android systems
 have a only a small amount of swap space and half this was not
 enough resulting in OOM errors. It was not possible to determine
 the exact amount of memory the test would require in all
 configurations to guarentee swap memory would be used but not
 trigger an OOM error.
 As a minimum reccomended swap partition on Linux is 256Mb the
 subtest was updated to require this.
 
 Also fixed a couple of small memory leaks.
 
 Signed-off-by: Derek Morton derek.j.mor...@intel.com
 ---
  tests/gem_render_linear_blits.c | 10 +-
  1 file changed, 9 insertions(+), 1 deletion(-)
 
 diff --git a/tests/gem_render_linear_blits.c b/tests/gem_render_linear_blits.c
 index f83c6d4..5dd210d 100644
 --- a/tests/gem_render_linear_blits.c
 +++ b/tests/gem_render_linear_blits.c
 @@ -184,6 +184,9 @@ static void run_test (int fd, int count)
   }
   intel_batchbuffer_free(batch);
   drm_intel_bufmgr_destroy(bufmgr);
 +
 + free(bo);
 + free(start_val);
  }
  
  igt_main
 @@ -210,7 +213,12 @@ igt_main
  
   igt_subtest(swap-thrash) {
   uint64_t swap_mb = intel_get_total_swap_mb();
 - igt_require(swap_mb  0);
 + /* The calculation of count allows 1/2 the swap memory as
 +overhead. However on Android systems with a very small swap
 +partition this is not enough resulting in OOM errors.
 +As 256Mb is a minimum recomended size for a swap partition
 +on Linux, skip the subtest if less than this. */
 + igt_require(swap_mb  255);
   count = ((intel_get_avail_ram_mb() + (swap_mb / 2)) * 
 1024*1024) / SIZE;
   intel_require_memory(count, SIZE, CHECK_RAM | CHECK_SWAP);

Surely fixing intel_require_memory(CHECK_SWAP) (adding the slop of
256MiB swap or somesuch) would be better?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm: Fixup locking WARNINGs in drm_mode_config_reset

2015-07-29 Thread Daniel Vetter
On Wed, Jul 29, 2015 at 08:32:43AM +0200, Daniel Vetter wrote:
 With
 
 commit 7a3f3d6667f5f9ffd1517f6b21d64bbf5312042c
 Author: Daniel Vetter daniel.vet...@ffwll.ch
 Date:   Thu Jul 9 23:44:28 2015 +0200
 
 drm: Check locking in drm_for_each_connector
 
 we started checking the locking in drm_for_each_connector but somehow
 I totally missed drm_mode_config_reset. There's no problem there since
 this function should only be called in single-threaded contexts
 (driver load or resume), so just wrap the loop with the right lock.
 
 v2: Drink coffee and all that ...
 
 Cc: Laurent Pinchart laurent.pinch...@ideasonboard.com
 Reported-by: Laurent Pinchart laurent.pinch...@ideasonboard.com
 Signed-off-by: Daniel Vetter daniel.vet...@intel.com

Tested-by: Jianwei Wang jianwei.wang@gmail.com

 ---
  drivers/gpu/drm/drm_crtc.c | 2 ++
  1 file changed, 2 insertions(+)
 
 diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
 index c91c18b2b1d4..10c1a0f6680c 100644
 --- a/drivers/gpu/drm/drm_crtc.c
 +++ b/drivers/gpu/drm/drm_crtc.c
 @@ -5273,9 +5273,11 @@ void drm_mode_config_reset(struct drm_device *dev)
   if (encoder-funcs-reset)
   encoder-funcs-reset(encoder);
  
 + mutex_lock(dev-mode_config.mutex);
   drm_for_each_connector(connector, dev)
   if (connector-funcs-reset)
   connector-funcs-reset(connector);
 + mutex_unlock(dev-mode_config.mutex);
  }
  EXPORT_SYMBOL(drm_mode_config_reset);
  
 -- 
 2.1.4
 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 4/7] drm/i915: Split sink_crc function in start, stop and read.

2015-07-29 Thread Daniel Vetter
On Tue, Jul 28, 2015 at 01:10:38PM -0700, Rafael Antognolli wrote:
 On Thu, Jul 23, 2015 at 04:35:47PM -0700, Rodrigo Vivi wrote:
  No functional change. Just a preparation patch to make clear
  what operation we are performing.
  
  Signed-off-by: Rodrigo Vivi rodrigo.v...@intel.com
 
 Good. The place where you call hsw_disable_ips() changes, but as you
 explained earlier, this is required.

That explanation should be in the commit message. Merged the two earlier
patches in the series meanwhile.
-Daniel

 
 Reviewed-by: Rafael Antognolli rafael.antogno...@intel.com
 
  ---
   drivers/gpu/drm/i915/intel_dp.c | 89 
  +++--
   1 file changed, 50 insertions(+), 39 deletions(-)
  
  diff --git a/drivers/gpu/drm/i915/intel_dp.c 
  b/drivers/gpu/drm/i915/intel_dp.c
  index 44f8a32..10cbc98 100644
  --- a/drivers/gpu/drm/i915/intel_dp.c
  +++ b/drivers/gpu/drm/i915/intel_dp.c
  @@ -3958,40 +3958,64 @@ intel_dp_probe_mst(struct intel_dp *intel_dp)
  return intel_dp-is_mst;
   }
   
  -int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc)
  +static void intel_dp_sink_crc_stop(struct intel_dp *intel_dp)
   {
  -   struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
  -   struct drm_device *dev = intel_dig_port-base.base.dev;
  -   struct intel_crtc *intel_crtc =
  -   to_intel_crtc(intel_dig_port-base.base.crtc);
  +   struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
  +   struct intel_crtc *intel_crtc = to_intel_crtc(dig_port-base.base.crtc);
  u8 buf;
  -   int test_crc_count;
  -   int attempts = 6;
  -   int ret = 0;
  -
  -   hsw_disable_ips(intel_crtc);
   
  -   if (drm_dp_dpcd_readb(intel_dp-aux, DP_TEST_SINK_MISC, buf)  0) {
  -   ret = -EIO;
  -   goto out;
  +   if (drm_dp_dpcd_readb(intel_dp-aux, DP_TEST_SINK, buf)  0) {
  +   DRM_DEBUG_KMS(Sink CRC couldn't be stopped properly\n);
  +   return;
  }
   
  -   if (!(buf  DP_TEST_CRC_SUPPORTED)) {
  -   ret = -ENOTTY;
  -   goto out;
  -   }
  +   if (drm_dp_dpcd_writeb(intel_dp-aux, DP_TEST_SINK,
  +  buf  ~DP_TEST_SINK_START)  0)
  +   DRM_DEBUG_KMS(Sink CRC couldn't be stopped properly\n);
   
  -   if (drm_dp_dpcd_readb(intel_dp-aux, DP_TEST_SINK, buf)  0) {
  -   ret = -EIO;
  -   goto out;
  -   }
  +   hsw_enable_ips(intel_crtc);
  +}
  +
  +static int intel_dp_sink_crc_start(struct intel_dp *intel_dp)
  +{
  +   struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
  +   struct intel_crtc *intel_crtc = to_intel_crtc(dig_port-base.base.crtc);
  +   u8 buf;
  +
  +   if (drm_dp_dpcd_readb(intel_dp-aux, DP_TEST_SINK_MISC, buf)  0)
  +   return -EIO;
  +
  +   if (!(buf  DP_TEST_CRC_SUPPORTED))
  +   return -ENOTTY;
  +
  +   if (drm_dp_dpcd_readb(intel_dp-aux, DP_TEST_SINK, buf)  0)
  +   return -EIO;
  +
  +   hsw_disable_ips(intel_crtc);
   
  if (drm_dp_dpcd_writeb(intel_dp-aux, DP_TEST_SINK,
  -   buf | DP_TEST_SINK_START)  0) {
  -   ret = -EIO;
  -   goto out;
  +  buf | DP_TEST_SINK_START)  0) {
  +   hsw_enable_ips(intel_crtc);
  +   return -EIO;
  }
   
  +   return 0;
  +}
  +
  +int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc)
  +{
  +   struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
  +   struct drm_device *dev = dig_port-base.base.dev;
  +   struct intel_crtc *intel_crtc = to_intel_crtc(dig_port-base.base.crtc);
  +   u8 buf;
  +   int test_crc_count;
  +   int attempts = 6;
  +   int ret;
  +
  +   ret = intel_dp_sink_crc_start(intel_dp);
  +   if (ret)
  +   return ret;
  +
  if (drm_dp_dpcd_readb(intel_dp-aux, DP_TEST_SINK_MISC, buf)  0) {
  ret = -EIO;
  goto stop;
  @@ -4014,23 +4038,10 @@ int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 
  *crc)
  goto stop;
  }
   
  -   if (drm_dp_dpcd_read(intel_dp-aux, DP_TEST_CRC_R_CR, crc, 6)  0) {
  +   if (drm_dp_dpcd_read(intel_dp-aux, DP_TEST_CRC_R_CR, crc, 6)  0)
  ret = -EIO;
  -   goto stop;
  -   }
  -
   stop:
  -   if (drm_dp_dpcd_readb(intel_dp-aux, DP_TEST_SINK, buf)  0) {
  -   DRM_DEBUG_KMS(Sink CRC couldn't be stopped properly\n);
  -   goto out;
  -   }
  -   if (drm_dp_dpcd_writeb(intel_dp-aux, DP_TEST_SINK,
  -  buf  ~DP_TEST_SINK_START)  0) {
  -   DRM_DEBUG_KMS(Sink CRC couldn't be stopped properly\n);
  -   goto out;
  -   }
  -out:
  -   hsw_enable_ips(intel_crtc);
  +   intel_dp_sink_crc_stop(intel_dp);
  return ret;
   }
   
  -- 
  2.1.0
  
  ___
  Intel-gfx mailing list
  Intel-gfx@lists.freedesktop.org
  http://lists.freedesktop.org/mailman/listinfo/intel-gfx
 ___
 Intel-gfx mailing list
 

[Intel-gfx] [PATCH i-g-t] tests/gem_render_linear_blits: Increase min swap required

2015-07-29 Thread Derek Morton
The swap-thrash subtest had a requirement that swap memory be
present but no minimum amount was specified. The subtest allowed
for half the total swap memory for overhead. Some android systems
have a only a small amount of swap space and half this was not
enough resulting in OOM errors. It was not possible to determine
the exact amount of memory the test would require in all
configurations to guarentee swap memory would be used but not
trigger an OOM error.
As a minimum reccomended swap partition on Linux is 256Mb the
subtest was updated to require this.

Also fixed a couple of small memory leaks.

Signed-off-by: Derek Morton derek.j.mor...@intel.com
---
 tests/gem_render_linear_blits.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/tests/gem_render_linear_blits.c b/tests/gem_render_linear_blits.c
index f83c6d4..5dd210d 100644
--- a/tests/gem_render_linear_blits.c
+++ b/tests/gem_render_linear_blits.c
@@ -184,6 +184,9 @@ static void run_test (int fd, int count)
}
intel_batchbuffer_free(batch);
drm_intel_bufmgr_destroy(bufmgr);
+
+   free(bo);
+   free(start_val);
 }
 
 igt_main
@@ -210,7 +213,12 @@ igt_main
 
igt_subtest(swap-thrash) {
uint64_t swap_mb = intel_get_total_swap_mb();
-   igt_require(swap_mb  0);
+   /* The calculation of count allows 1/2 the swap memory as
+  overhead. However on Android systems with a very small swap
+  partition this is not enough resulting in OOM errors.
+  As 256Mb is a minimum recomended size for a swap partition
+  on Linux, skip the subtest if less than this. */
+   igt_require(swap_mb  255);
count = ((intel_get_avail_ram_mb() + (swap_mb / 2)) * 
1024*1024) / SIZE;
intel_require_memory(count, SIZE, CHECK_RAM | CHECK_SWAP);
run_test(fd, count);
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm: Fixup locking WARNINGs in drm_mode_config_reset

2015-07-29 Thread Daniel Vetter
With

commit 7a3f3d6667f5f9ffd1517f6b21d64bbf5312042c
Author: Daniel Vetter daniel.vet...@ffwll.ch
Date:   Thu Jul 9 23:44:28 2015 +0200

drm: Check locking in drm_for_each_connector

we started checking the locking in drm_for_each_connector but somehow
I totally missed drm_mode_config_reset. There's no problem there since
this function should only be called in single-threaded contexts
(driver load or resume), so just wrap the loop with the right lock.

v2: Drink coffee and all that ...

Cc: Laurent Pinchart laurent.pinch...@ideasonboard.com
Reported-by: Laurent Pinchart laurent.pinch...@ideasonboard.com
Signed-off-by: Daniel Vetter daniel.vet...@intel.com
---
 drivers/gpu/drm/drm_crtc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index c91c18b2b1d4..10c1a0f6680c 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -5273,9 +5273,11 @@ void drm_mode_config_reset(struct drm_device *dev)
if (encoder-funcs-reset)
encoder-funcs-reset(encoder);
 
+   mutex_lock(dev-mode_config.mutex);
drm_for_each_connector(connector, dev)
if (connector-funcs-reset)
connector-funcs-reset(connector);
+   mutex_unlock(dev-mode_config.mutex);
 }
 EXPORT_SYMBOL(drm_mode_config_reset);
 
-- 
2.1.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm: Fixup locking WARNINGs in drm_mode_config_reset

2015-07-29 Thread Daniel Vetter
With

commit 7a3f3d6667f5f9ffd1517f6b21d64bbf5312042c
Author: Daniel Vetter daniel.vet...@ffwll.ch
Date:   Thu Jul 9 23:44:28 2015 +0200

drm: Check locking in drm_for_each_connector

we started checking the locking in drm_for_each_connector but somehow
I totally missed drm_mode_config_reset. There's no problem there since
this function should only be called in single-threaded contexts
(driver load or resume), so just wrap the loop with the right lock.

Cc: Laurent Pinchart laurent.pinch...@ideasonboard.com
Reported-by: Laurent Pinchart laurent.pinch...@ideasonboard.com
Signed-off-by: Daniel Vetter daniel.vet...@intel.com
---
 drivers/gpu/drm/drm_crtc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index c91c18b2b1d4..a8e5ca79aad1 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -5273,9 +5273,11 @@ void drm_mode_config_reset(struct drm_device *dev)
if (encoder-funcs-reset)
encoder-funcs-reset(encoder);
 
+   mutex_lock(dev-mode_config.mutex);
drm_for_each_connector(connector, dev)
if (connector-funcs-reset)
connector-funcs-reset(connector);
+   mutex_unlock(dev-mode_config.mutex);
 }
 EXPORT_SYMBOL(drm_mode_config_reset);
 
-- 
2.1.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH i-g-t] tests/gem_render_linear_blits: Increase min swap required

2015-07-29 Thread Chris Wilson
On Wed, Jul 29, 2015 at 09:52:31AM +, Morton, Derek J wrote:
 
 
 
 -Original Message-
 From: Chris Wilson [mailto:ch...@chris-wilson.co.uk] 
 Sent: Wednesday, July 29, 2015 9:54 AM
 To: Morton, Derek J
 Cc: intel-gfx@lists.freedesktop.org; Wood, Thomas
 Subject: Re: [Intel-gfx] [PATCH i-g-t] tests/gem_render_linear_blits: 
 Increase min swap required
 
 On Wed, Jul 29, 2015 at 09:30:21AM +0100, Derek Morton wrote:
  The swap-thrash subtest had a requirement that swap memory be present 
  but no minimum amount was specified. The subtest allowed for half the 
  total swap memory for overhead. Some android systems have a only a 
  small amount of swap space and half this was not enough resulting in 
  OOM errors. It was not possible to determine the exact amount of 
  memory the test would require in all configurations to guarentee swap 
  memory would be used but not trigger an OOM error.
  As a minimum reccomended swap partition on Linux is 256Mb the subtest 
  was updated to require this.
  
  Also fixed a couple of small memory leaks.
  
  Signed-off-by: Derek Morton derek.j.mor...@intel.com
  ---
   tests/gem_render_linear_blits.c | 10 +-
   1 file changed, 9 insertions(+), 1 deletion(-)
  
  diff --git a/tests/gem_render_linear_blits.c 
  b/tests/gem_render_linear_blits.c index f83c6d4..5dd210d 100644
  --- a/tests/gem_render_linear_blits.c
  +++ b/tests/gem_render_linear_blits.c
  @@ -184,6 +184,9 @@ static void run_test (int fd, int count)
 }
 intel_batchbuffer_free(batch);
 drm_intel_bufmgr_destroy(bufmgr);
  +
  +  free(bo);
  +  free(start_val);
   }
   
   igt_main
  @@ -210,7 +213,12 @@ igt_main
   
 igt_subtest(swap-thrash) {
 uint64_t swap_mb = intel_get_total_swap_mb();
  -  igt_require(swap_mb  0);
  +  /* The calculation of count allows 1/2 the swap memory as
  + overhead. However on Android systems with a very small swap
  + partition this is not enough resulting in OOM errors.
  + As 256Mb is a minimum recomended size for a swap partition
  + on Linux, skip the subtest if less than this. */
  +  igt_require(swap_mb  255);
 count = ((intel_get_avail_ram_mb() + (swap_mb / 2)) * 
  1024*1024) / SIZE;
 intel_require_memory(count, SIZE, CHECK_RAM | CHECK_SWAP);
 
 Surely fixing intel_require_memory(CHECK_SWAP) (adding the slop of 256MiB 
 swap or somesuch) would be better?
 
 I don't think so. igt_require(swap_mb  255) is simple and easy to 
 understand. intel_require_memory() is doing a crude count+SIZE  ram+swap. It 
 does not take into account any other memory the test might be using or allow 
 any overhead for additional memory used elsewhere in the system. I tried 
 several scenarios with the HW I have. 2Gb RAM and no swap (removed the +swap 
 code) required count = ... / SIZE + 50Kb to run without any OOM errors (40Kb 
 was intermittent). On a 1Gb system with 99Mb of Swap it would quickly OOM 
 even though it should have an extra 50Mb (swap / 2) of free memory on top of 
 what the +50Kb was giving. I do not see any way of accurately calculating 
 count to be a value that guarantees swap memory will be used without going 
 OOM when the amount of swap is small and the amount of overhead RAM used is 
 not known accurately. Hence just skipping the subtest in that situation.
 In fact the intel_require_memory() as it stands is pointless and could even 
 be removed as count is calculated to be a value where intel_require_memory() 
 will never trigger an assert.

So basically, you have a kernel bug you wish to ignore?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 17/18] drm/i915/skl: Removed csr firmware load in resume path.

2015-07-29 Thread Sunil Kamath

On Tuesday 28 July 2015 04:53 PM, Sunil Kamath wrote:

On Sunday 26 July 2015 12:30 AM, Animesh Manna wrote:

As csr firmware is taking care of loading the firmware,
so no need for driver to load again.

Cc: Damien Lespiau damien.lesp...@intel.com
Cc: Imre Deak imre.d...@intel.com
Cc: Sunil Kamath sunil.kam...@intel.com
Signed-off-by: Animesh Manna animesh.ma...@intel.com
Signed-off-by: Vathsala Nagaraju vathsala.nagar...@intel.com
---
  drivers/gpu/drm/i915/i915_drv.c | 1 -
  1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c 
b/drivers/gpu/drm/i915/i915_drv.c

index 77b35fd..f5e720b 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1048,7 +1048,6 @@ static int skl_resume_prepare(struct 
drm_i915_private *dev_priv)

  skl_disable_dc6(dev_priv);
skl_init_cdclk(dev_priv);
-intel_csr_load_program(dev_priv);


The context save and restore program is reset on cold boot, warm 
reset, PCI function level reset, and hibernate/suspend.


Will it not impact?

- Sunil


If the concern is about checking DC5/DC6 counters, isn't it good idea to 
add that as debug print before f/w reload?

Than to avoid completely reloading of entire firmware?

- Sunil

return 0;
  }




___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v2 06/12] drm/i915: Make crtc checking use the atomic state.

2015-07-29 Thread Ander Conselvan De Oliveira
On Mon, 2015-07-27 at 14:35 +0200, Maarten Lankhorst wrote:
 Instead of allocating pipe_config on the stack use the old crtc_state,
 it's only going to freed from this point on.
 
 All crtc's encoders are now only checked once during modeset.
 
 Signed-off-by: Maarten Lankhorst maarten.lankho...@linux.intel.com
 ---
  drivers/gpu/drm/i915/intel_display.c | 118 
 +--
  1 file changed, 56 insertions(+), 62 deletions(-)
 
 diff --git a/drivers/gpu/drm/i915/intel_display.c 
 b/drivers/gpu/drm/i915/intel_display.c
 index fbb257d4728c..e3afe611a78c 100644
 --- a/drivers/gpu/drm/i915/intel_display.c
 +++ b/drivers/gpu/drm/i915/intel_display.c
 @@ -11829,7 +11829,7 @@ static int intel_crtc_atomic_check(struct drm_crtc 
 *crtc,
   struct intel_crtc_state *pipe_config =
   to_intel_crtc_state(crtc_state);
   struct drm_atomic_state *state = crtc_state-state;
 - int ret, idx = crtc-base.id;
 + int ret;
   bool mode_changed = needs_modeset(crtc_state);
  
   if (mode_changed  !check_encoder_cloning(state, intel_crtc)) {
 @@ -11837,10 +11837,6 @@ static int intel_crtc_atomic_check(struct drm_crtc 
 *crtc,
   return -EINVAL;
   }
  
 - I915_STATE_WARN(crtc-state-active != intel_crtc-active,
 - [CRTC:%i] mismatch between state-active(%i) and 
 crtc-active(%i)\n,
 - idx, crtc-state-active, intel_crtc-active);
 -
   if (mode_changed  !crtc_state-active)
   intel_crtc-atomic.update_wm_post = true;
  
 @@ -12721,19 +12717,16 @@ check_encoder_state(struct drm_device *dev)
  
   for_each_intel_encoder(dev, encoder) {
   bool enabled = false;
 - bool active = false;
 - enum pipe pipe, tracked_pipe;
 + enum pipe pipe;
  
   DRM_DEBUG_KMS([ENCODER:%d:%s]\n,
 encoder-base.base.id,
 encoder-base.name);
  
   for_each_intel_connector(dev, connector) {
 - if (connector-base.encoder != encoder-base)
 + if (connector-base.state-best_encoder != 
 encoder-base)
   continue;
   enabled = true;
 - if (connector-base.dpms != DRM_MODE_DPMS_OFF)
 - active = true;
  
   I915_STATE_WARN(connector-base.state-crtc !=
   encoder-base.crtc,
 @@ -12744,85 +12737,86 @@ check_encoder_state(struct drm_device *dev)
encoder's enabled state mismatch 
(expected %i, found %i)\n,
!!encoder-base.crtc, enabled);
 - I915_STATE_WARN(active  !encoder-base.crtc,
 -  active encoder with no crtc\n);
 -
 - active = encoder-get_hw_state(encoder, pipe);
  
   if (!encoder-base.crtc) {
 - I915_STATE_WARN(active,
 -  encoder detached but not turned off.\n);
 + bool active;
  
 - continue;
 + active = encoder-get_hw_state(encoder, pipe);
 + I915_STATE_WARN(active,
 +  encoder detached but still enabled on pipe %c.\n,
 +  pipe_name(pipe));
   }
 -
 - I915_STATE_WARN(active != encoder-base.crtc-state-active,
 -  encoder's hw state doesn't match sw tracking 
 -  (expected %i, found %i)\n,
 -  encoder-base.crtc-state-active, active);
 -
 -
 - tracked_pipe = to_intel_crtc(encoder-base.crtc)-pipe;
 - I915_STATE_WARN(active  pipe != tracked_pipe,
 -  active encoder's pipe doesn't match
 -  (expected %i, found %i)\n,
 -  tracked_pipe, pipe);
 -
   }
  }
  
  static void
 -check_crtc_state(struct drm_device *dev)
 +check_crtc_state(struct drm_device *dev, struct drm_atomic_state *state)
  {
   struct drm_i915_private *dev_priv = dev-dev_private;
 - struct intel_crtc *crtc;
   struct intel_encoder *encoder;
 - struct intel_crtc_state pipe_config;
 + struct drm_crtc_state *crtc_state;
 + struct drm_crtc *crtc;
 + int i;
  
 - for_each_intel_crtc(dev, crtc) {
 + for_each_crtc_in_state(state, crtc, crtc_state, i) {

So now we only check the state of crtcs affected by the modeset. In the
unlikely case of a bug that changes the hw state of another crtc that
should have been unchanged, the old code might catch the issue but the
new one doesn't. Isn't it better to just check everything?


Ander

 + struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 + struct intel_crtc_state *pipe_config, *sw_config;
   bool active;
  
 - memset(pipe_config, 0, sizeof(pipe_config));
 + if (!needs_modeset(crtc-state))
 + 

[Intel-gfx] [PATCH 2/3] drm/rcar: Only update planes on active crtc

2015-07-29 Thread Daniel Vetter
R-car does runtime pm (that's why it's committing plane state only at
the end). Therefore better to only update planes on active crtc. Note
that since the helpers always add all enabled planes when doing a
modeset change on a crtc we are guaranteed to update plane hw state to
the latest requested state each time the crtc is enabled.

Cc: Laurent Pinchart laurent.pinchart+rene...@ideasonboard.com
Signed-off-by: Daniel Vetter daniel.vet...@intel.com
---
 drivers/gpu/drm/rcar-du/rcar_du_kms.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/rcar-du/rcar_du_kms.c 
b/drivers/gpu/drm/rcar-du/rcar_du_kms.c
index ca12e8ca5552..20813582fbf1 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_kms.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_kms.c
@@ -456,7 +456,7 @@ static void rcar_du_atomic_complete(struct rcar_du_commit 
*commit)
/* Apply the atomic update. */
drm_atomic_helper_commit_modeset_disables(dev, old_state);
drm_atomic_helper_commit_modeset_enables(dev, old_state);
-   drm_atomic_helper_commit_planes(dev, old_state, false);
+   drm_atomic_helper_commit_planes(dev, old_state, true);
 
drm_atomic_helper_wait_for_vblanks(dev, old_state);
 
-- 
2.5.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/atomic: Paper over locking WARN in default_state_clear

2015-07-29 Thread Daniel Vetter
In

commit 6f75cea66c8dd043ced282016b21a639af176642
Author: Daniel Vetter daniel.vet...@ffwll.ch
Date:   Wed Nov 19 18:38:07 2014 +0100

drm/atomic: Only destroy connector states with connection mutex held

I tried to fix races of atomic commits against connector
hot-unplugging. The idea is to ensure lifetimes by holding the
connection_mutex long enough. That works for synchronous commits, but
not for async ones.

For async atomic commit we really need to fix up connector lifetimes
for real. But that's a much bigger task, so just add more duct-tape:
For cleaning up connector states we currently don't need the connector
itself. So NULL it out and remove the locking check. Of course that
check was to protect the entire sequence, but the modeset itself
should be save since currently DP MST hot-removal does a dpms-off. And
that should synchronize with any outstanding async atomic commit.

Or at least that's my hope, this is all a giant mess.

Reported-by: Maarten Lankhorst maarten.lankho...@linux.intel.com
Cc: Maarten Lankhorst maarten.lankho...@linux.intel.com
Signed-off-by: Daniel Vetter daniel.vet...@intel.com
---
 drivers/gpu/drm/drm_atomic.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index 3efd91c0c6cb..434915448ea0 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -153,9 +153,15 @@ void drm_atomic_state_default_clear(struct 
drm_atomic_state *state)
if (!connector)
continue;
 
-   WARN_ON(!drm_modeset_is_locked(config-connection_mutex));
-
-   connector-funcs-atomic_destroy_state(connector,
+   /*
+* FIXME: Async commits can race with connector unplugging and
+* there's currently nothing that prevents cleanup up state for
+* deleted connectors. As long as the callback doesn't look at
+* the connector we'll be fine though, so make sure that's the
+* case by setting all connector pointers to NULL.
+*/
+   state-connector_states[i]-connector = NULL;
+   connector-funcs-atomic_destroy_state(NULL,
   
state-connector_states[i]);
state-connectors[i] = NULL;
state-connector_states[i] = NULL;
-- 
2.5.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 17/18] drm/i915/skl: Removed csr firmware load in resume path.

2015-07-29 Thread Sunil Kamath

On Tuesday 28 July 2015 04:53 PM, Sunil Kamath wrote:

On Sunday 26 July 2015 12:30 AM, Animesh Manna wrote:

As csr firmware is taking care of loading the firmware,
so no need for driver to load again.

Cc: Damien Lespiau damien.lesp...@intel.com
Cc: Imre Deak imre.d...@intel.com
Cc: Sunil Kamath sunil.kam...@intel.com
Signed-off-by: Animesh Manna animesh.ma...@intel.com
Signed-off-by: Vathsala Nagaraju vathsala.nagar...@intel.com
---
  drivers/gpu/drm/i915/i915_drv.c | 1 -
  1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c 
b/drivers/gpu/drm/i915/i915_drv.c

index 77b35fd..f5e720b 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1048,7 +1048,6 @@ static int skl_resume_prepare(struct 
drm_i915_private *dev_priv)

  skl_disable_dc6(dev_priv);
skl_init_cdclk(dev_priv);
-intel_csr_load_program(dev_priv);


The context save and restore program is reset on cold boot, warm 
reset, PCI function level reset, and hibernate/suspend.


Will it not impact?

- Sunil


If the intention is just to check the DC5/DC6 counters, why cant we just 
add debug prints before f/w reload? Than to just avoiding reloading fw 
itself.


- Sunil

return 0;
  }




___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v5 17/19] drm/i915: Wa32bitGeneralStateOffset Wa32bitInstructionBaseOffset

2015-07-29 Thread Chris Wilson
On Wed, Jul 29, 2015 at 12:05:55PM +0100, Michel Thierry wrote:
 @@ -680,8 +680,8 @@ eb_vma_misplaced(struct i915_vma *vma)
  if (entry-flags  __EXEC_OBJECT_NEEDS_MAP  
  !obj-map_and_fenceable)
  return !only_mappable_for_reloc(entry-flags);
 
 -   if (!(entry-flags  EXEC_OBJECT_SUPPORTS_48B_ADDRESS) 
 -   (vma-node.start + vma-node.size) = (1ULL  32))
 +   if ((entry-flags  EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 
 +   (vma-node.start + vma-node.size - 1)  32)
  return true;
 
  return false;
 
 Also updated this part to follow your suggestion.
 
 I also spent a bit more time trying to figure what gcc was doing
 here. But I can't reproduce it locally, I get sizeof(1ULL32) = 8
 and 1ULL32 doesn't seem to be zero (in 32 and 64 bit kernels).
 
 Could it be related to the gcc version? I'm using 4.8.4.

It looked valid to me as well, but a printk confirmed that gcc was
hitting that path for every object.

gcc (Ubuntu 4.9.2-10ubuntu13) 4.9.2
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 03/16] drm/i915: Attach color properties to CRTC

2015-07-29 Thread Sharma, Shashank
HI Jim,
Thanks for the review.
My comments inline. 

Regards
Shashank
-Original Message-
From: Intel-gfx [mailto:intel-gfx-boun...@lists.freedesktop.org] On Behalf Of 
Bish, Jim
Sent: Tuesday, July 21, 2015 3:44 AM
To: intel-gfx@lists.freedesktop.org
Subject: Re: [Intel-gfx] [PATCH 03/16] drm/i915: Attach color properties to CRTC



On 07/15/2015 06:09 AM, Kausal Malladi wrote:
 This patch does the following:
 1. Adds new files intel_color_manager(.c/.h) 2. Attaches color 
 properties to CRTC while initialization
 
 Signed-off-by: Shashank Sharma shashank.sha...@intel.com
 Signed-off-by: Kausal Malladi kausal.mall...@intel.com
 ---
  drivers/gpu/drm/i915/Makefile  |  3 +-
  drivers/gpu/drm/i915/intel_color_manager.c | 49 
 ++  drivers/gpu/drm/i915/intel_color_manager.h | 
 29 ++
  drivers/gpu/drm/i915/intel_display.c   |  2 ++
  drivers/gpu/drm/i915/intel_drv.h   |  4 +++
  5 files changed, 86 insertions(+), 1 deletion(-)  create mode 100644 
 drivers/gpu/drm/i915/intel_color_manager.c
  create mode 100644 drivers/gpu/drm/i915/intel_color_manager.h
 
 diff --git a/drivers/gpu/drm/i915/Makefile 
 b/drivers/gpu/drm/i915/Makefile index de21965..ad928d8 100644
 --- a/drivers/gpu/drm/i915/Makefile
 +++ b/drivers/gpu/drm/i915/Makefile
 @@ -56,7 +56,8 @@ i915-y += intel_audio.o \
 intel_overlay.o \
 intel_psr.o \
 intel_sideband.o \
 -   intel_sprite.o
 +   intel_sprite.o \
 +   intel_color_manager.o
  i915-$(CONFIG_ACPI)  += intel_acpi.o intel_opregion.o
  i915-$(CONFIG_DRM_I915_FBDEV)+= intel_fbdev.o
  
 diff --git a/drivers/gpu/drm/i915/intel_color_manager.c 
 b/drivers/gpu/drm/i915/intel_color_manager.c
 new file mode 100644
 index 000..baa4536
 --- /dev/null
 +++ b/drivers/gpu/drm/i915/intel_color_manager.c
 @@ -0,0 +1,49 @@
 +/*
 + * Copyright © 2015 Intel Corporation
 + *
 + * Permission is hereby granted, free of charge, to any person 
 +obtaining a
 + * copy of this software and associated documentation files (the 
 +Software),
 + * to deal in the Software without restriction, including without 
 +limitation
 + * the rights to use, copy, modify, merge, publish, distribute, 
 +sublicense,
 + * and/or sell copies of the Software, and to permit persons to whom 
 +the
 + * Software is furnished to do so, subject to the following conditions:
 + *
 + * The above copyright notice and this permission notice (including 
 +the next
 + * paragraph) shall be included in all copies or substantial portions 
 +of the
 + * Software.
 + *
 + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, 
 +EXPRESS OR
 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
 +MERCHANTABILITY,
 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT 
 +SHALL
 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES 
 +OR OTHER
 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
 +ARISING
 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
 +OTHER DEALINGS
 + * IN THE SOFTWARE.
 + *
 + * Authors:
 + * Shashank Sharma shashank.sha...@intel.com
 + * Kausal Malladi kausal.mall...@intel.com  */
 +
 +#include intel_color_manager.h
 +
 +void intel_attach_color_properties_to_crtc(struct drm_device *dev,
 + struct drm_mode_object *mode_obj)
 +{
 + struct drm_mode_config *config = dev-mode_config;
 +
 + if (IS_CHERRYVIEW(dev)) {
 + if (config-prop_color_capabilities)
 + drm_object_attach_property(mode_obj,
 + config-prop_color_capabilities, 0);
 + if (config-prop_palette_before_ctm)
 + drm_object_attach_property(mode_obj,
 + config-prop_palette_before_ctm, 0);
 + if (config-prop_palette_after_ctm)
 + drm_object_attach_property(mode_obj,
 + config-prop_palette_after_ctm, 0);
 + if (config-prop_ctm)
 + drm_object_attach_property(mode_obj,
 + config-prop_ctm, 0);
 + }
why only CHT?  Seems we should be putting cases for all of our devices.
Yes, this should be for all, We will changes this. 
 +}
 diff --git a/drivers/gpu/drm/i915/intel_color_manager.h 
 b/drivers/gpu/drm/i915/intel_color_manager.h
 new file mode 100644
 index 000..04c921d
 --- /dev/null
 +++ b/drivers/gpu/drm/i915/intel_color_manager.h
 @@ -0,0 +1,29 @@
 +/*
 + * Copyright © 2015 Intel Corporation
 + *
 + * Permission is hereby granted, free of charge, to any person 
 +obtaining a
 + * copy of this software and associated documentation files (the 
 +Software),
 + * to deal in the Software without restriction, including without 
 +limitation
 + * the rights to use, copy, modify, merge, publish, distribute, 
 +sublicense,
 + * and/or sell copies of the Software, and to permit 

Re: [Intel-gfx] [PATCH 3/4] drm/i915: Add support for stealing purgable stolen pages

2015-07-29 Thread Chris Wilson
On Mon, Jul 27, 2015 at 11:38:13AM +0200, Daniel Vetter wrote:
 Chris and I just discussed on irc that the bound_list isn't in a great LRU
 order right now and Chris sent out a fix for that. But it only works if we
 preferrentially shrink inactive objects first. Worth the bother or just a
 FIXME? For the fb use-case alone it's not needed since we can't remove the
 fb until it's no longer being displayed (otherwise the backwards-compat
 code kicks in and synchronously kills the display at RMFB time), and that
 pretty much means we can't put the underlying bo into any cache (and mark
 it purgeable) either. But a FIXME comment here would be good for sure,
 just in case this assumption ever gets broken.

I've been mucking around with patch a bit (with contexts-from-stolen
reenabled) and the list ierators used here are terrible; severely
impacting our allocations by a few orders of magnitude (imagine having
just the ggtt full of 4k objects, let alone several ppgtt all full of
their own bound 4k objetcs).

To combat this will require a special purgeable list maintaind by
madv(), and subclassing the struct drm_mm_node to hold our extra
details.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v2 06/12] drm/i915: Make crtc checking use the atomic state.

2015-07-29 Thread Daniel Vetter
On Wed, Jul 29, 2015 at 02:49:45PM +0300, Ander Conselvan De Oliveira wrote:
 On Mon, 2015-07-27 at 14:35 +0200, Maarten Lankhorst wrote:
  Instead of allocating pipe_config on the stack use the old crtc_state,
  it's only going to freed from this point on.
  
  All crtc's encoders are now only checked once during modeset.
  
  Signed-off-by: Maarten Lankhorst maarten.lankho...@linux.intel.com
  ---
   drivers/gpu/drm/i915/intel_display.c | 118 
  +--
   1 file changed, 56 insertions(+), 62 deletions(-)
  
  diff --git a/drivers/gpu/drm/i915/intel_display.c 
  b/drivers/gpu/drm/i915/intel_display.c
  index fbb257d4728c..e3afe611a78c 100644
  --- a/drivers/gpu/drm/i915/intel_display.c
  +++ b/drivers/gpu/drm/i915/intel_display.c
  @@ -11829,7 +11829,7 @@ static int intel_crtc_atomic_check(struct drm_crtc 
  *crtc,
  struct intel_crtc_state *pipe_config =
  to_intel_crtc_state(crtc_state);
  struct drm_atomic_state *state = crtc_state-state;
  -   int ret, idx = crtc-base.id;
  +   int ret;
  bool mode_changed = needs_modeset(crtc_state);
   
  if (mode_changed  !check_encoder_cloning(state, intel_crtc)) {
  @@ -11837,10 +11837,6 @@ static int intel_crtc_atomic_check(struct drm_crtc 
  *crtc,
  return -EINVAL;
  }
   
  -   I915_STATE_WARN(crtc-state-active != intel_crtc-active,
  -   [CRTC:%i] mismatch between state-active(%i) and 
  crtc-active(%i)\n,
  -   idx, crtc-state-active, intel_crtc-active);
  -
  if (mode_changed  !crtc_state-active)
  intel_crtc-atomic.update_wm_post = true;
   
  @@ -12721,19 +12717,16 @@ check_encoder_state(struct drm_device *dev)
   
  for_each_intel_encoder(dev, encoder) {
  bool enabled = false;
  -   bool active = false;
  -   enum pipe pipe, tracked_pipe;
  +   enum pipe pipe;
   
  DRM_DEBUG_KMS([ENCODER:%d:%s]\n,
encoder-base.base.id,
encoder-base.name);
   
  for_each_intel_connector(dev, connector) {
  -   if (connector-base.encoder != encoder-base)
  +   if (connector-base.state-best_encoder != 
  encoder-base)
  continue;
  enabled = true;
  -   if (connector-base.dpms != DRM_MODE_DPMS_OFF)
  -   active = true;
   
  I915_STATE_WARN(connector-base.state-crtc !=
  encoder-base.crtc,
  @@ -12744,85 +12737,86 @@ check_encoder_state(struct drm_device *dev)
   encoder's enabled state mismatch 
   (expected %i, found %i)\n,
   !!encoder-base.crtc, enabled);
  -   I915_STATE_WARN(active  !encoder-base.crtc,
  -active encoder with no crtc\n);
  -
  -   active = encoder-get_hw_state(encoder, pipe);
   
  if (!encoder-base.crtc) {
  -   I915_STATE_WARN(active,
  -encoder detached but not turned off.\n);
  +   bool active;
   
  -   continue;
  +   active = encoder-get_hw_state(encoder, pipe);
  +   I915_STATE_WARN(active,
  +encoder detached but still enabled on pipe %c.\n,
  +pipe_name(pipe));
  }
  -
  -   I915_STATE_WARN(active != encoder-base.crtc-state-active,
  -encoder's hw state doesn't match sw tracking 
  -(expected %i, found %i)\n,
  -encoder-base.crtc-state-active, active);
  -
  -
  -   tracked_pipe = to_intel_crtc(encoder-base.crtc)-pipe;
  -   I915_STATE_WARN(active  pipe != tracked_pipe,
  -active encoder's pipe doesn't match
  -(expected %i, found %i)\n,
  -tracked_pipe, pipe);
  -
  }
   }
   
   static void
  -check_crtc_state(struct drm_device *dev)
  +check_crtc_state(struct drm_device *dev, struct drm_atomic_state *state)
   {
  struct drm_i915_private *dev_priv = dev-dev_private;
  -   struct intel_crtc *crtc;
  struct intel_encoder *encoder;
  -   struct intel_crtc_state pipe_config;
  +   struct drm_crtc_state *crtc_state;
  +   struct drm_crtc *crtc;
  +   int i;
   
  -   for_each_intel_crtc(dev, crtc) {
  +   for_each_crtc_in_state(state, crtc, crtc_state, i) {
 
 So now we only check the state of crtcs affected by the modeset. In the
 unlikely case of a bug that changes the hw state of another crtc that
 should have been unchanged, the old code might catch the issue but the
 new one doesn't. Isn't it better to just check everything?

We can't since that other crtc might be doing some other async commit. But
eventually we should be doing a modeset on that one too and spot that
something went wrong.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation

Re: [Intel-gfx] [PATCH i-g-t] tests/gem_render_linear_blits: Increase min swap required

2015-07-29 Thread Morton, Derek J


-Original Message-
From: Chris Wilson [mailto:ch...@chris-wilson.co.uk] 
Sent: Wednesday, July 29, 2015 9:54 AM
To: Morton, Derek J
Cc: intel-gfx@lists.freedesktop.org; Wood, Thomas
Subject: Re: [Intel-gfx] [PATCH i-g-t] tests/gem_render_linear_blits: Increase 
min swap required

On Wed, Jul 29, 2015 at 09:30:21AM +0100, Derek Morton wrote:
 The swap-thrash subtest had a requirement that swap memory be present 
 but no minimum amount was specified. The subtest allowed for half the 
 total swap memory for overhead. Some android systems have a only a 
 small amount of swap space and half this was not enough resulting in 
 OOM errors. It was not possible to determine the exact amount of 
 memory the test would require in all configurations to guarentee swap 
 memory would be used but not trigger an OOM error.
 As a minimum reccomended swap partition on Linux is 256Mb the subtest 
 was updated to require this.
 
 Also fixed a couple of small memory leaks.
 
 Signed-off-by: Derek Morton derek.j.mor...@intel.com
 ---
  tests/gem_render_linear_blits.c | 10 +-
  1 file changed, 9 insertions(+), 1 deletion(-)
 
 diff --git a/tests/gem_render_linear_blits.c 
 b/tests/gem_render_linear_blits.c index f83c6d4..5dd210d 100644
 --- a/tests/gem_render_linear_blits.c
 +++ b/tests/gem_render_linear_blits.c
 @@ -184,6 +184,9 @@ static void run_test (int fd, int count)
  }
  intel_batchbuffer_free(batch);
  drm_intel_bufmgr_destroy(bufmgr);
 +
 +free(bo);
 +free(start_val);
  }
  
  igt_main
 @@ -210,7 +213,12 @@ igt_main
  
  igt_subtest(swap-thrash) {
  uint64_t swap_mb = intel_get_total_swap_mb();
 -igt_require(swap_mb  0);
 +/* The calculation of count allows 1/2 the swap memory as
 +   overhead. However on Android systems with a very small swap
 +   partition this is not enough resulting in OOM errors.
 +   As 256Mb is a minimum recomended size for a swap partition
 +   on Linux, skip the subtest if less than this. */
 +igt_require(swap_mb  255);
  count = ((intel_get_avail_ram_mb() + (swap_mb / 2)) * 
 1024*1024) / SIZE;
  intel_require_memory(count, SIZE, CHECK_RAM | CHECK_SWAP);

Surely fixing intel_require_memory(CHECK_SWAP) (adding the slop of 256MiB swap 
or somesuch) would be better?

I don't think so. igt_require(swap_mb  255) is simple and easy to understand. 
intel_require_memory() is doing a crude count+SIZE  ram+swap. It does not take 
into account any other memory the test might be using or allow any overhead for 
additional memory used elsewhere in the system. I tried several scenarios with 
the HW I have. 2Gb RAM and no swap (removed the +swap code) required count = 
... / SIZE + 50Kb to run without any OOM errors (40Kb was intermittent). On a 
1Gb system with 99Mb of Swap it would quickly OOM even though it should have an 
extra 50Mb (swap / 2) of free memory on top of what the +50Kb was giving. I do 
not see any way of accurately calculating count to be a value that guarantees 
swap memory will be used without going OOM when the amount of swap is small and 
the amount of overhead RAM used is not known accurately. Hence just skipping 
the subtest in that situation.
In fact the intel_require_memory() as it stands is pointless and could even be 
removed as count is calculated to be a value where intel_require_memory() will 
never trigger an assert.

//Derek

-Chris

--
Chris Wilson, Intel Open Source Technology Centre

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v5 17/19] drm/i915: Wa32bitGeneralStateOffset Wa32bitInstructionBaseOffset

2015-07-29 Thread Michel Thierry

On 7/28/2015 3:43 PM, Chris Wilson wrote:

On Tue, Jul 28, 2015 at 12:12:11PM +0100, Michel Thierry wrote:

On 7/27/2015 10:11 PM, Chris Wilson wrote:

On Thu, Jul 16, 2015 at 10:33:29AM +0100, Michel Thierry wrote:

+   if (!(entry-flags  EXEC_OBJECT_SUPPORTS_48B_ADDRESS) 
+   (vma-node.start + vma-node.size) = (1ULL  32))
+   return true;


gcc completely screwed this up here and used 0 for 1ULL32.

Note that we can allow state + size == 4G (since the end is exclusive),
so I went with

   if ((entry-flags  EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 
   (vma-node.start + vma-node.size - 1)  32)
  return true;

instead.
-Chris



Thanks, I'll include this change in the next patch version.


I've also got a couple of other stylistic changes, plus an earlier
request:
...

I updated my patch with these changes, thanks.


diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 209e8e2b07be..78fc8810d6e0 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -680,8 +680,8 @@ eb_vma_misplaced(struct i915_vma *vma)
 if (entry-flags  __EXEC_OBJECT_NEEDS_MAP  !obj-map_and_fenceable)
 return !only_mappable_for_reloc(entry-flags);

-   if (!(entry-flags  EXEC_OBJECT_SUPPORTS_48B_ADDRESS) 
-   (vma-node.start + vma-node.size) = (1ULL  32))
+   if ((entry-flags  EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 
+   (vma-node.start + vma-node.size - 1)  32)
 return true;

 return false;


Also updated this part to follow your suggestion.

I also spent a bit more time trying to figure what gcc was doing here. 
But I can't reproduce it locally, I get sizeof(1ULL32) = 8 and 
1ULL32 doesn't seem to be zero (in 32 and 64 bit kernels).


Could it be related to the gcc version? I'm using 4.8.4.

-Michel
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 04/16] drm: Add structure for querying palette color capabilities

2015-07-29 Thread Sharma, Shashank
Regards
Shashank

-Original Message-
From: Intel-gfx [mailto:intel-gfx-boun...@lists.freedesktop.org] On Behalf Of 
Bish, Jim
Sent: Tuesday, July 21, 2015 3:48 AM
To: intel-gfx@lists.freedesktop.org
Subject: Re: [Intel-gfx] [PATCH 04/16] drm: Add structure for querying palette 
color capabilities



On 07/15/2015 06:09 AM, Kausal Malladi wrote:
 The DRM color management framework is targeting various hardware 
 platforms and drivers. Different platforms can have different color 
 correction and enhancement capabilities.
 
 A commom user space application can query these capabilities using the 
 DRM property interface. Each driver can fill this property with its 
 platform's palette color capabilities.
 
 This patch adds new structure in DRM layer for querying palette color 
 capabilities. This structure will be used by all user space agents to 
 configure appropriate color configurations.
 
 Signed-off-by: Shashank Sharma shashank.sha...@intel.com
 Signed-off-by: Kausal Malladi kausal.mall...@intel.com
 ---
  include/uapi/drm/drm.h | 11 +++
  1 file changed, 11 insertions(+)
 
 diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 
 3801584..e3c642f 100644
 --- a/include/uapi/drm/drm.h
 +++ b/include/uapi/drm/drm.h
 @@ -829,6 +829,17 @@ struct drm_event_vblank {
   __u32 reserved;
  };
  
 +struct drm_palette_caps {
 + /* Structure version. Should be 1 currently */
 + __u32 version;
 + /* For padding and future use */
 + __u32 reserved;
 + /* This may be 0 if not supported. e.g. plane palette or VLV pipe */
 + __u32 num_samples_before_ctm;
 + /* This will be non-zero for pipe. May be zero for planes on some HW */
 + __u32 num_samples_after_ctm;
 +};
this structure does not match what is documented in the design document.  are 
we missing updates to the design document?
Yes, Susanta will be updating the document in a day or two. Thanks for 
reminding us. 
 +
  /* typedef area */
  #ifndef __KERNEL__
  typedef struct drm_clip_rect drm_clip_rect_t;
 
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 1/3] drm/atomic-helper: Add option to update planes only on active crtc

2015-07-29 Thread Daniel Vetter
With drivers supporting runtime pm it's generally not a good idea to
touch the hardware when it's off. Add an option to the commit_planes
helper to support this case.

Note that the helpers already add all planes on a crtc when a modeset
happens, hence plane updates will not be lost if drivers set this to
true.

v2: Check for NULL state-crtc before chasing the pointer. Also check
both old and new crtc if there's a switch. Finally just outright
disallow switching crtcs for a plane if the plane is in active use, on
most hardware that doesn't make sense.

v3: Since commit_planes(active_only = true) is for enabling things
only after all the crtc are on we should only look at the new crtc to
decide whether to call the plane hooks - if the current CRTC isn't on
then skip. If the old crtc (when moving a plane) went down then the
plane should have been disabled as part of the pipe shutdown work
already. For which there's currently no helper really unfortunately.
Also move the check for wether a plane gets a new CRTC assigned while
still in active use out of this patch.

Cc: Maarten Lankhorst maarten.lankho...@linux.intel.com
Cc: Thierry Reding tred...@nvidia.com
Cc: Laurent Pinchart laurent.pinchart+rene...@ideasonboard.com
Signed-off-by: Daniel Vetter daniel.vet...@intel.com
---
 drivers/gpu/drm/drm_atomic_helper.c| 20 ++--
 drivers/gpu/drm/exynos/exynos_drm_fb.c |  2 +-
 drivers/gpu/drm/msm/msm_atomic.c   |  2 +-
 drivers/gpu/drm/omapdrm/omap_drv.c |  2 +-
 drivers/gpu/drm/rcar-du/rcar_du_kms.c  |  2 +-
 drivers/gpu/drm/sti/sti_drm_drv.c  |  2 +-
 drivers/gpu/drm/tegra/drm.c|  2 +-
 include/drm/drm_atomic_helper.h|  3 ++-
 8 files changed, 26 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/drm_atomic_helper.c 
b/drivers/gpu/drm/drm_atomic_helper.c
index 0b475fae067d..6be0adb5a0e9 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -1029,7 +1029,7 @@ int drm_atomic_helper_commit(struct drm_device *dev,
 
drm_atomic_helper_commit_modeset_disables(dev, state);
 
-   drm_atomic_helper_commit_planes(dev, state);
+   drm_atomic_helper_commit_planes(dev, state, false);
 
drm_atomic_helper_commit_modeset_enables(dev, state);
 
@@ -1144,10 +1144,16 @@ fail:
 }
 EXPORT_SYMBOL(drm_atomic_helper_prepare_planes);
 
+bool plane_crtc_active(struct drm_plane_state *state)
+{
+   return state-crtc  state-crtc-state-active;
+}
+
 /**
  * drm_atomic_helper_commit_planes - commit plane state
  * @dev: DRM device
  * @old_state: atomic state object with old state structures
+ * @active_only: Only commit on active CRTC if set
  *
  * This function commits the new plane state using the plane and atomic helper
  * functions for planes and crtcs. It assumes that the atomic state has already
@@ -1162,7 +1168,8 @@ EXPORT_SYMBOL(drm_atomic_helper_prepare_planes);
  * drm_atomic_helper_commit_planes_on_crtc() instead.
  */
 void drm_atomic_helper_commit_planes(struct drm_device *dev,
-struct drm_atomic_state *old_state)
+struct drm_atomic_state *old_state,
+bool active_only)
 {
struct drm_crtc *crtc;
struct drm_crtc_state *old_crtc_state;
@@ -1178,6 +1185,9 @@ void drm_atomic_helper_commit_planes(struct drm_device 
*dev,
if (!funcs || !funcs-atomic_begin)
continue;
 
+   if (active_only  !crtc-state-active)
+   continue;
+
funcs-atomic_begin(crtc, old_crtc_state);
}
 
@@ -1189,6 +1199,9 @@ void drm_atomic_helper_commit_planes(struct drm_device 
*dev,
if (!funcs)
continue;
 
+   if (active_only  !plane_crtc_active(plane-state))
+   continue;
+
/*
 * Special-case disabling the plane if drivers support it.
 */
@@ -1208,6 +1221,9 @@ void drm_atomic_helper_commit_planes(struct drm_device 
*dev,
if (!funcs || !funcs-atomic_flush)
continue;
 
+   if (active_only  !crtc-state-active)
+   continue;
+
funcs-atomic_flush(crtc, old_crtc_state);
}
 }
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c 
b/drivers/gpu/drm/exynos/exynos_drm_fb.c
index 2b6320e6eae2..7b383acbb5af 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fb.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c
@@ -293,7 +293,7 @@ static int exynos_atomic_commit(struct drm_device *dev,
 * have the relevant clocks enabled to perform the update.
 */
 
-   drm_atomic_helper_commit_planes(dev, state);
+   drm_atomic_helper_commit_planes(dev, state, false);
 
drm_atomic_helper_cleanup_planes(dev, state);
 
diff --git a/drivers/gpu/drm/msm/msm_atomic.c b/drivers/gpu/drm/msm/msm_atomic.c
index 

[Intel-gfx] [PATCH 3/3] drm/atomic: refuse changing CRTC for planes while active

2015-07-29 Thread Daniel Vetter
Very strictly speaking this is possible if you have special hw and
genlocked CRTCs. In general switching a plane between two active CRTC
just won't work so well and is probably not tested at all. Just forbid
it.

The exception is when both CRTC do a full modeset, then it should be
no problem at all to move the planes around (presuming a correct
implementation) so allow that case.

I've put this into the core since I really couldn't come up with a
case where we don't want to enforce that. But if that ever happens it
would be easy to move this check into helpers.

Cc: Thierry Reding thierry.red...@gmail.com
Cc: Maarten Lankhorst maarten.lankho...@linux.intel.com
Signed-off-by: Daniel Vetter daniel.vet...@intel.com
---
 drivers/gpu/drm/drm_atomic.c| 38 +
 drivers/gpu/drm/drm_atomic_helper.c |  1 +
 2 files changed, 39 insertions(+)

diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index 434915448ea0..422183e7ee7d 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -663,6 +663,38 @@ drm_atomic_plane_get_property(struct drm_plane *plane,
return 0;
 }
 
+/* checks whether a plane has its CRTC switched while being in active use. */
+static bool
+active_plane_switching(struct drm_atomic_state *state,
+  struct drm_plane *plane,
+  struct drm_plane_state *plane_state)
+{
+   struct drm_crtc_state *crtc_state, *curr_crtc_state;
+
+   if (!plane-state-crtc || !plane_state-crtc)
+   return false;
+
+   if (plane-state-crtc == plane_state-crtc)
+   return false;
+
+   curr_crtc_state = plane-state-crtc-state;
+   if (!curr_crtc_state-active)
+   return false;
+
+   crtc_state = drm_atomic_get_existing_crtc_state(state,
+   plane_state-crtc);
+   if (!crtc_state-active)
+   return false;
+
+   /* plane switching CRTC and both CRTC are active. This is only ok if
+* both CRTC do a full modeset. */
+   if (drm_atomic_crtc_needs_modeset(curr_crtc_state) 
+   drm_atomic_crtc_needs_modeset(crtc_state))
+   return false;
+
+   return true;
+}
+
 /**
  * drm_atomic_plane_check - check plane state
  * @plane: plane to check
@@ -734,6 +766,12 @@ static int drm_atomic_plane_check(struct drm_plane *plane,
return -ENOSPC;
}
 
+   if (active_plane_switching(state-state, plane, state)) {
+   DRM_DEBUG_ATOMIC([PLANE:%d] switching active CRTC without 
modeset\n,
+plane-base.id);
+   return -EINVAL;
+   }
+
return 0;
 }
 
diff --git a/drivers/gpu/drm/drm_atomic_helper.c 
b/drivers/gpu/drm/drm_atomic_helper.c
index 6be0adb5a0e9..54c59ddc59a5 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -497,6 +497,7 @@ drm_atomic_helper_check_planes(struct drm_device *dev,
 plane-base.id);
return ret;
}
+
}
 
for_each_crtc_in_state(state, crtc, crtc_state, i) {
-- 
2.5.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH i-g-t] tests/gem_render_linear_blits: Increase min swap required

2015-07-29 Thread Gore, Tim


Tim Gore 
Intel Corporation (UK) Ltd. - Co. Reg. #1134945 - Pipers Way, Swindon SN3 1RJ

 -Original Message-
 From: Intel-gfx [mailto:intel-gfx-boun...@lists.freedesktop.org] On Behalf
 Of Chris Wilson
 Sent: Wednesday, July 29, 2015 10:56 AM
 To: Morton, Derek J
 Cc: intel-gfx@lists.freedesktop.org; Wood, Thomas
 Subject: Re: [Intel-gfx] [PATCH i-g-t] tests/gem_render_linear_blits: Increase
 min swap required
 
 On Wed, Jul 29, 2015 at 09:52:31AM +, Morton, Derek J wrote:
 
  
  
  -Original Message-
  From: Chris Wilson [mailto:ch...@chris-wilson.co.uk]
  Sent: Wednesday, July 29, 2015 9:54 AM
  To: Morton, Derek J
  Cc: intel-gfx@lists.freedesktop.org; Wood, Thomas
  Subject: Re: [Intel-gfx] [PATCH i-g-t] tests/gem_render_linear_blits:
  Increase min swap required
  
  On Wed, Jul 29, 2015 at 09:30:21AM +0100, Derek Morton wrote:
   The swap-thrash subtest had a requirement that swap memory be
   present but no minimum amount was specified. The subtest allowed
   for half the total swap memory for overhead. Some android systems
   have a only a small amount of swap space and half this was not
   enough resulting in OOM errors. It was not possible to determine
   the exact amount of memory the test would require in all
   configurations to guarentee swap memory would be used but not
 trigger an OOM error.
   As a minimum reccomended swap partition on Linux is 256Mb the
   subtest was updated to require this.
  
   Also fixed a couple of small memory leaks.
  
   Signed-off-by: Derek Morton derek.j.mor...@intel.com
   ---
tests/gem_render_linear_blits.c | 10 +-
1 file changed, 9 insertions(+), 1 deletion(-)
  
   diff --git a/tests/gem_render_linear_blits.c
   b/tests/gem_render_linear_blits.c index f83c6d4..5dd210d 100644
   --- a/tests/gem_render_linear_blits.c
   +++ b/tests/gem_render_linear_blits.c
   @@ -184,6 +184,9 @@ static void run_test (int fd, int count)
}
intel_batchbuffer_free(batch);
drm_intel_bufmgr_destroy(bufmgr);
   +
   +free(bo);
   +free(start_val);
}
  
igt_main
   @@ -210,7 +213,12 @@ igt_main
  
igt_subtest(swap-thrash) {
uint64_t swap_mb = intel_get_total_swap_mb();
   -igt_require(swap_mb  0);
   +/* The calculation of count allows 1/2 the swap memory 
   as
   +   overhead. However on Android systems with a very 
   small
 swap
   +   partition this is not enough resulting in OOM errors.
   +   As 256Mb is a minimum recomended size for a swap
 partition
   +   on Linux, skip the subtest if less than this. */
   +igt_require(swap_mb  255);
count = ((intel_get_avail_ram_mb() + (swap_mb / 2)) *
 1024*1024) / SIZE;
intel_require_memory(count, SIZE, CHECK_RAM |
 CHECK_SWAP);
  
  Surely fixing intel_require_memory(CHECK_SWAP) (adding the slop of
 256MiB swap or somesuch) would be better?
 
  I don't think so. igt_require(swap_mb  255) is simple and easy to
 understand. intel_require_memory() is doing a crude count+SIZE 
 ram+swap. It does not take into account any other memory the test might be
 using or allow any overhead for additional memory used elsewhere in the
 system. I tried several scenarios with the HW I have. 2Gb RAM and no swap
 (removed the +swap code) required count = ... / SIZE + 50Kb to run without
 any OOM errors (40Kb was intermittent). On a 1Gb system with 99Mb of
 Swap it would quickly OOM even though it should have an extra 50Mb (swap
 / 2) of free memory on top of what the +50Kb was giving. I do not see any
 way of accurately calculating count to be a value that guarantees swap
 memory will be used without going OOM when the amount of swap is small
 and the amount of overhead RAM used is not known accurately. Hence just
 skipping the subtest in that situation.
  In fact the intel_require_memory() as it stands is pointless and could even
 be removed as count is calculated to be a value where
 intel_require_memory() will never trigger an assert.
 
 So basically, you have a kernel bug you wish to ignore?
 -Chris
 
 --
 Chris Wilson, Intel Open Source Technology Centre

I don’t see how this implies a kernel bug. It seems like a test problem (my
subtest as it happens). I was unaware of Android systems with small swap
partitions (or indeed any swap at all). Not sure I can understand the logic of
such a tiny swap partition but given the situation, unless we can accurately
characterise the memory usage of the test in advance then we have to
either skip the test for small swap, or try to monitor memory usage in an
ongoing way during the test.

 Tim Gore
 ___
 Intel-gfx mailing list
 Intel-gfx@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/intel-gfx
___
Intel-gfx mailing list

Re: [Intel-gfx] [PATCH i-g-t] tests/gem_render_linear_blits: Increase min swap required

2015-07-29 Thread Chris Wilson
On Wed, Jul 29, 2015 at 01:10:23PM +, Gore, Tim wrote:
 I don’t see how this implies a kernel bug. It seems like a test problem (my
 subtest as it happens). I was unaware of Android systems with small swap
 partitions (or indeed any swap at all). Not sure I can understand the logic of
 such a tiny swap partition but given the situation, unless we can accurately
 characterise the memory usage of the test in advance then we have to
 either skip the test for small swap, or try to monitor memory usage in an
 ongoing way during the test.

If the system has enough resources to run the test (that is enough
physical to run an individual batch plus enough swap to hold the rest),
then the test must not oom.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v2 09/12] drm/i915: Remove connectors_active from intel_dp.c.

2015-07-29 Thread Ander Conselvan De Oliveira
On Mon, 2015-07-27 at 14:35 +0200, Maarten Lankhorst wrote:
 Now that everything's atomic, checking encoder-base.crtc is enough.

Don't you need to check encoder-base.crtc-state-active too?

 Cc: Ville Syrjälä ville.syrj...@linux.intel.com
 Signed-off-by: Maarten Lankhorst maarten.lankho...@linux.intel.com
 ---
  drivers/gpu/drm/i915/intel_dp.c | 7 ++-
  1 file changed, 2 insertions(+), 5 deletions(-)
 
 diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
 index cea7d1785d13..dccd9a0e2526 100644
 --- a/drivers/gpu/drm/i915/intel_dp.c
 +++ b/drivers/gpu/drm/i915/intel_dp.c
 @@ -2624,7 +2624,7 @@ static void vlv_steal_power_sequencer(struct drm_device 
 *dev,
   DRM_DEBUG_KMS(stealing pipe %c power sequencer from port %c\n,
 pipe_name(pipe), port_name(port));
  
 - WARN(encoder-connectors_active,
 + WARN(encoder-base.crtc,
stealing pipe %c power sequencer from active eDP port 
 %c\n,
pipe_name(pipe), port_name(port));
  
 @@ -4240,10 +4240,7 @@ intel_dp_check_link_status(struct intel_dp *intel_dp)
  
   WARN_ON(!drm_modeset_is_locked(dev-mode_config.connection_mutex));
  
 - if (!intel_encoder-connectors_active)
 - return;
 -
 - if (WARN_ON(!intel_encoder-base.crtc))
 + if (!intel_encoder-base.crtc)
   return;
  
   if (!to_intel_crtc(intel_encoder-base.crtc)-active)
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v2 08/12] drm/i915: Remove connectors_active from sanitization.

2015-07-29 Thread Ander Conselvan De Oliveira
On Mon, 2015-07-27 at 14:35 +0200, Maarten Lankhorst wrote:
 connectors_active will be removed, so just calculate this right here.
 
 Signed-off-by: Maarten Lankhorst maarten.lankho...@linux.intel.com
 ---
  drivers/gpu/drm/i915/intel_display.c | 17 ++---
  1 file changed, 14 insertions(+), 3 deletions(-)
 
 diff --git a/drivers/gpu/drm/i915/intel_display.c 
 b/drivers/gpu/drm/i915/intel_display.c
 index ed9eba2666e2..341fadb40c81 100644
 --- a/drivers/gpu/drm/i915/intel_display.c
 +++ b/drivers/gpu/drm/i915/intel_display.c
 @@ -14935,8 +14935,10 @@ static void intel_sanitize_crtc(struct intel_crtc 
 *crtc)
   /* Adjust the state of the output pipe according to whether we
* have active connectors/encoders. */
   enable = false;
 - for_each_encoder_on_crtc(dev, crtc-base, encoder)
 - enable |= encoder-connectors_active;
 + for_each_encoder_on_crtc(dev, crtc-base, encoder) {
 + enable = true;
 + break;
 + }
  
   if (!enable)
   intel_crtc_disable_noatomic(crtc-base);
 @@ -14992,6 +14994,7 @@ static void intel_sanitize_encoder(struct 
 intel_encoder *encoder)
  {
   struct intel_connector *connector;
   struct drm_device *dev = encoder-base.dev;
 + bool active = false;
  
   /* We need to check both for a crtc link (meaning that the
* encoder is active and trying to read from a pipe) and the
 @@ -14999,7 +15002,15 @@ static void intel_sanitize_encoder(struct 
 intel_encoder *encoder)
   bool has_active_crtc = encoder-base.crtc 
   to_intel_crtc(encoder-base.crtc)-active;
  
 - if (encoder-connectors_active  !has_active_crtc) {
 + for_each_intel_connector(dev, connector) {
 + if (connector-encoder != encoder)

Shouldn't this be connector-base.encoder?

Ander
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v6 15/19] drm/i915: batch_obj vm offset must be u64

2015-07-29 Thread Goel, Akash


Reviewed the patch  it looks fine.
Reviewed-by: Akash Goel akash.g...@intel.com

On 7/29/2015 9:53 PM, Michel Thierry wrote:

Otherwise it can overflow in 48-bit mode, and cause an incorrect
exec_start.

Before commit 5f19e2bffa63a91cd4ac1adcec648e14a44277ce (drm/i915: Merged
the many do_execbuf() parameters into a structure), it was already an u64.

Signed-off-by: Michel Thierry michel.thie...@intel.com
---
  drivers/gpu/drm/i915/i915_drv.h | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 33926d9..ed2fbcd 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1674,7 +1674,7 @@ struct i915_execbuffer_params {
struct drm_file *file;
uint32_tdispatch_flags;
uint32_targs_batch_start_offset;
-   uint32_tbatch_obj_vm_offset;
+   uint64_tbatch_obj_vm_offset;
struct intel_engine_cs  *ring;
struct drm_i915_gem_object  *batch_obj;
struct intel_context*ctx;


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v6 16/19] drm/i915/userptr: Kill user_size limit check

2015-07-29 Thread Goel, Akash

Reviewed the patch  it looks fine.
Reviewed-by: Akash Goel akash.g...@intel.com

On 7/29/2015 9:54 PM, Michel Thierry wrote:

GTT was only 32b and its max value is 4GB. In order to allow objects
bigger than 4GB in 48b PPGTT, i915_gem_userptr_ioctl we could check
against max 48b range (1ULL  48).

But since the check no longer applies, just kill the limit.

v2: Use the default ctx to infer the ppgtt max size (Akash).
v3: Just kill the limit, it was only there for early detection of an
error when used for execbuffer (Chris).

Cc: Akash Goel akash.g...@intel.com
Reviewed-by: Chris Wilson ch...@chris-wilson.co.uk
Signed-off-by: Michel Thierry michel.thie...@intel.com
---
  drivers/gpu/drm/i915/i915_gem_userptr.c | 4 
  1 file changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c 
b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 8fd431b..d11901d 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -813,7 +813,6 @@ static const struct drm_i915_gem_object_ops 
i915_gem_userptr_ops = {
  int
  i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file 
*file)
  {
-   struct drm_i915_private *dev_priv = dev-dev_private;
struct drm_i915_gem_userptr *args = data;
struct drm_i915_gem_object *obj;
int ret;
@@ -826,9 +825,6 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, 
struct drm_file *file
if (offset_in_page(args-user_ptr | args-user_size))
return -EINVAL;

-   if (args-user_size  dev_priv-gtt.base.total)
-   return -E2BIG;
-
if (!access_ok(args-flags  I915_USERPTR_READ_ONLY ? VERIFY_READ : 
VERIFY_WRITE,
   (char __user *)(unsigned long)args-user_ptr, 
args-user_size))
return -EFAULT;


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v6 12/19] drm/i915: Expand error state's address width to 64b

2015-07-29 Thread Goel, Akash

Reviewed the patch  it looks fine.
Reviewed-by: Akash Goel akash.g...@intel.com

On 7/29/2015 9:53 PM, Michel Thierry wrote:

v2: For semaphore errors, object is mapped to GGTT and offset will not
be  4GB, print only lower 32-bits (Akash)
v3: Print gtt_offset in groups of 32-bit (Chris)

Cc: Akash Goel akash.g...@intel.com
Cc: Chris Wilson ch...@chris-wilson.co.uk
Signed-off-by: Ben Widawsky b...@bwidawsk.net
Signed-off-by: Michel Thierry michel.thie...@intel.com
---
  drivers/gpu/drm/i915/i915_drv.h   |  4 ++--
  drivers/gpu/drm/i915/i915_gpu_error.c | 24 ++--
  2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0b5cbe8..33926d9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -546,7 +546,7 @@ struct drm_i915_error_state {

struct drm_i915_error_object {
int page_count;
-   u32 gtt_offset;
+   u64 gtt_offset;
u32 *pages[0];
} *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;

@@ -572,7 +572,7 @@ struct drm_i915_error_state {
u32 size;
u32 name;
u32 rseqno[I915_NUM_RINGS], wseqno;
-   u32 gtt_offset;
+   u64 gtt_offset;
u32 read_domains;
u32 write_domain;
s32 fence_reg:I915_MAX_NUM_FENCE_BITS;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index 6f42569..f79c952 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -197,8 +197,9 @@ static void print_error_buffers(struct 
drm_i915_error_state_buf *m,
err_printf(m,   %s [%d]:\n, name, count);

while (count--) {
-   err_printf(m, %08x %8u %02x %02x [ ,
-  err-gtt_offset,
+   err_printf(m, %08x_%08x %8u %02x %02x [ ,
+  upper_32_bits(err-gtt_offset),
+  lower_32_bits(err-gtt_offset),
   err-size,
   err-read_domains,
   err-write_domain);
@@ -426,15 +427,17 @@ int i915_error_state_to_str(struct 
drm_i915_error_state_buf *m,
err_printf(m,  (submitted by %s [%d]),
   error-ring[i].comm,
   error-ring[i].pid);
-   err_printf(m,  --- gtt_offset = 0x%08x\n,
-  obj-gtt_offset);
+   err_printf(m,  --- gtt_offset = 0x%08x %08x\n,
+  upper_32_bits(obj-gtt_offset),
+  lower_32_bits(obj-gtt_offset));
print_error_obj(m, obj);
}

obj = error-ring[i].wa_batchbuffer;
if (obj) {
err_printf(m, %s (w/a) --- gtt_offset = 0x%08x\n,
-  dev_priv-ring[i].name, obj-gtt_offset);
+  dev_priv-ring[i].name,
+  lower_32_bits(obj-gtt_offset));
print_error_obj(m, obj);
}

@@ -453,14 +456,14 @@ int i915_error_state_to_str(struct 
drm_i915_error_state_buf *m,
if ((obj = error-ring[i].ringbuffer)) {
err_printf(m, %s --- ringbuffer = 0x%08x\n,
   dev_priv-ring[i].name,
-  obj-gtt_offset);
+  lower_32_bits(obj-gtt_offset));
print_error_obj(m, obj);
}

if ((obj = error-ring[i].hws_page)) {
err_printf(m, %s --- HW Status = 0x%08x\n,
   dev_priv-ring[i].name,
-  obj-gtt_offset);
+  lower_32_bits(obj-gtt_offset));
offset = 0;
for (elt = 0; elt  PAGE_SIZE/16; elt += 4) {
err_printf(m, [%04x] %08x %08x %08x %08x\n,
@@ -476,13 +479,14 @@ int i915_error_state_to_str(struct 
drm_i915_error_state_buf *m,
if ((obj = error-ring[i].ctx)) {
err_printf(m, %s --- HW Context = 0x%08x\n,
   dev_priv-ring[i].name,
-  obj-gtt_offset);
+  lower_32_bits(obj-gtt_offset));
print_error_obj(m, obj);
}
}

if ((obj = error-semaphore_obj)) {
-   err_printf(m, Semaphore page = 0x%08x\n, obj-gtt_offset);
+   err_printf(m, Semaphore page = 0x%08x\n,
+  

Re: [Intel-gfx] [PATCH v6 13/19] drm/i915/gen8: Add ppgtt info and debug_dump

2015-07-29 Thread Goel, Akash

Reviewed the patch  it looks fine.
Reviewed-by: Akash Goel akash.g...@intel.com

On 7/29/2015 9:53 PM, Michel Thierry wrote:

v2: Clean up patch after rebases.
v3: gen8_dump_ppgtt for 32b and 48b PPGTT.
v4: Use used_pml4es/pdpes (Akash).
v5: Rebase after Mika's ppgtt cleanup / scratch merge patch series.
v6: Rely on used_px bits instead of null checking (Akash)

Cc: Akash Goel akash.g...@intel.com
Signed-off-by: Ben Widawsky b...@bwidawsk.net
Signed-off-by: Michel Thierry michel.thie...@intel.com (v2+)
---
  drivers/gpu/drm/i915/i915_debugfs.c | 18 
  drivers/gpu/drm/i915/i915_gem_gtt.c | 84 +
  2 files changed, 94 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 23a69307..b6f1a13 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2250,7 +2250,6 @@ static void gen6_ppgtt_info(struct seq_file *m, struct 
drm_device *dev)
  {
struct drm_i915_private *dev_priv = dev-dev_private;
struct intel_engine_cs *ring;
-   struct drm_file *file;
int i;

if (INTEL_INFO(dev)-gen == 6)
@@ -2273,13 +2272,6 @@ static void gen6_ppgtt_info(struct seq_file *m, struct 
drm_device *dev)
ppgtt-debug_dump(ppgtt, m);
}

-   list_for_each_entry_reverse(file, dev-filelist, lhead) {
-   struct drm_i915_file_private *file_priv = file-driver_priv;
-
-   seq_printf(m, proc: %s\n,
-  get_pid_task(file-pid, PIDTYPE_PID)-comm);
-   idr_for_each(file_priv-context_idr, per_file_ctx, m);
-   }
seq_printf(m, ECOCHK: 0x%08x\n, I915_READ(GAM_ECOCHK));
  }

@@ -2288,6 +2280,7 @@ static int i915_ppgtt_info(struct seq_file *m, void *data)
struct drm_info_node *node = m-private;
struct drm_device *dev = node-minor-dev;
struct drm_i915_private *dev_priv = dev-dev_private;
+   struct drm_file *file;

int ret = mutex_lock_interruptible(dev-struct_mutex);
if (ret)
@@ -2299,6 +2292,15 @@ static int i915_ppgtt_info(struct seq_file *m, void 
*data)
else if (INTEL_INFO(dev)-gen = 6)
gen6_ppgtt_info(m, dev);

+   list_for_each_entry_reverse(file, dev-filelist, lhead) {
+   struct drm_i915_file_private *file_priv = file-driver_priv;
+
+   seq_printf(m, \nproc: %s\n,
+  get_pid_task(file-pid, PIDTYPE_PID)-comm);
+   idr_for_each(file_priv-context_idr, per_file_ctx,
+(void *)(unsigned long)m);
+   }
+
intel_runtime_pm_put(dev_priv);
mutex_unlock(dev-struct_mutex);

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 73cfe56..0d7c7c1 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1361,6 +1361,89 @@ static int gen8_alloc_va_range(struct i915_address_space 
*vm,
return gen8_alloc_va_range_3lvl(vm, ppgtt-pdp, start, length);
  }

+static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp,
+ uint64_t start, uint64_t length,
+ gen8_pte_t scratch_pte,
+ struct seq_file *m)
+{
+   struct i915_page_directory *pd;
+   uint64_t temp;
+   uint32_t pdpe;
+
+   gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
+   struct i915_page_table *pt;
+   uint64_t pd_len = length;
+   uint64_t pd_start = start;
+   uint32_t pde;
+
+   if (!test_bit(pdpe, pdp-used_pdpes))
+   continue;
+
+   seq_printf(m, \tPDPE #%d\n, pdpe);
+   gen8_for_each_pde(pt, pd, pd_start, pd_len, temp, pde) {
+   uint32_t  pte;
+   gen8_pte_t *pt_vaddr;
+
+   if (!test_bit(pde, pd-used_pdes))
+   continue;
+
+   pt_vaddr = kmap_px(pt);
+   for (pte = 0; pte  GEN8_PTES; pte += 4) {
+   uint64_t va =
+   (pdpe  GEN8_PDPE_SHIFT) |
+   (pde  GEN8_PDE_SHIFT) |
+   (pte  GEN8_PTE_SHIFT);
+   int i;
+   bool found = false;
+
+   for (i = 0; i  4; i++)
+   if (pt_vaddr[pte + i] != scratch_pte)
+   found = true;
+   if (!found)
+   continue;
+
+   seq_printf(m, \t\t0x%llx [%03d,%03d,%04d]: =, 
va, pdpe, pde, pte);
+   for (i = 0; i  4; i++) {
+   if (pt_vaddr[pte + i] 

Re: [Intel-gfx] [PATCH v6 14/19] drm/i915: object size needs to be u64

2015-07-29 Thread Goel, Akash

Reviewed the patch  it looks fine.
Reviewed-by: Akash Goel akash.g...@intel.com

On 7/29/2015 9:53 PM, Michel Thierry wrote:

In a 48b world, users can try to allocate buffers bigger than 4GB; in
these cases it is important that size is a 64b variable.

v2: Drop the warning about bind with size 0, it shouldn't happen anyway.
Signed-off-by: Michel Thierry michel.thie...@intel.com
---
  drivers/gpu/drm/i915/i915_gem.c | 5 +++--
  1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5d68578..80f5d97 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3348,7 +3348,8 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object 
*obj,
  {
struct drm_device *dev = obj-base.dev;
struct drm_i915_private *dev_priv = dev-dev_private;
-   u32 size, fence_size, fence_alignment, unfenced_alignment;
+   u32 fence_alignment, unfenced_alignment;
+   u64 size, fence_size;
u64 start =
flags  PIN_OFFSET_BIAS ? flags  PIN_OFFSET_MASK : 0;
u64 end =
@@ -3407,7 +3408,7 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object 
*obj,
 * attempt to find space.
 */
if (size  end) {
-   DRM_DEBUG(Attempting to bind an object (view type=%u) larger than the 
aperture: size=%u  %s aperture=%llu\n,
+   DRM_DEBUG(Attempting to bind an object (view type=%u) larger than the 
aperture: size=%llu  %s aperture=%llu\n,
  ggtt_view ? ggtt_view-type : 0,
  size,
  flags  PIN_MAPPABLE ? mappable : total,


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v6 17/19] drm/i915: Wa32bitGeneralStateOffset Wa32bitInstructionBaseOffset

2015-07-29 Thread Goel, Akash

Reviewed the patch  it looks fine.
Reviewed-by: Akash Goel akash.g...@intel.com

On 7/29/2015 9:54 PM, Michel Thierry wrote:

There are some allocations that must be only referenced by 32-bit
offsets. To limit the chances of having the first 4GB already full,
objects not requiring this workaround use DRM_MM_SEARCH_BELOW/
DRM_MM_CREATE_TOP flags

In specific, any resource used with flat/heapless (0x-0xf000)
General State Heap (GSH) or Instruction State Heap (ISH) must be in a
32-bit range, because the General State Offset and Instruction State
Offset are limited to 32-bits.

Objects must have EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag to indicate if
they can be allocated above the 32-bit address range. To limit the
chances of having the first 4GB already full, objects will use
DRM_MM_SEARCH_BELOW + DRM_MM_CREATE_TOP flags when possible.

v2: Changed flag logic from neeeds_32b, to supports_48b.
v3: Moved 48-bit support flag back to exec_object. (Chris, Daniel)
v4: Split pin flags into PIN_ZONE_4G and PIN_HIGH; update PIN_OFFSET_MASK
to use last PIN_ defined instead of hard-coded value; use correct limit
check in eb_vma_misplaced. (Chris)
v5: Don't touch PIN_OFFSET_MASK and update workaround comment (Chris)
v6: Apply pin-high for ggtt too (Chris)
v7: Handle simultaneous pin-high and pin-mappable end correctly (Akash)
 Fix check for entries currently using +4GB addresses, use min_t and
 other polish in object_bind_to_vm (Chris)

Cc: Chris Wilson ch...@chris-wilson.co.uk
Cc: Akash Goel akash.g...@intel.com
Reviewed-by: Chris Wilson ch...@chris-wilson.co.uk (v4)
Signed-off-by: Michel Thierry michel.thie...@intel.com
---
  drivers/gpu/drm/i915/i915_drv.h|  2 ++
  drivers/gpu/drm/i915/i915_gem.c| 25 +++--
  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 13 +
  include/uapi/drm/i915_drm.h|  3 ++-
  4 files changed, 36 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ed2fbcd..c344805 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2775,6 +2775,8 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
  #define PIN_OFFSET_BIAS   (13)
  #define PIN_USER  (14)
  #define PIN_UPDATE(15)
+#define PIN_ZONE_4G(16)
+#define PIN_HIGH   (17)
  #define PIN_OFFSET_MASK (~4095)
  int __must_check
  i915_gem_object_pin(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 80f5d97..e1ca63f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3349,11 +3349,9 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object 
*obj,
struct drm_device *dev = obj-base.dev;
struct drm_i915_private *dev_priv = dev-dev_private;
u32 fence_alignment, unfenced_alignment;
+   u32 search_flag, alloc_flag;
+   u64 start, end;
u64 size, fence_size;
-   u64 start =
-   flags  PIN_OFFSET_BIAS ? flags  PIN_OFFSET_MASK : 0;
-   u64 end =
-   flags  PIN_MAPPABLE ? dev_priv-gtt.mappable_end : vm-total;
struct i915_vma *vma;
int ret;

@@ -3393,6 +3391,13 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object 
*obj,
size = flags  PIN_MAPPABLE ? fence_size : obj-base.size;
}

+   start = flags  PIN_OFFSET_BIAS ? flags  PIN_OFFSET_MASK : 0;
+   end = vm-total;
+   if (flags  PIN_MAPPABLE)
+   end = min_t(u64, end, dev_priv-gtt.mappable_end);
+   if (flags  PIN_ZONE_4G)
+   end = min_t(u64, end, (1ULL  32));
+
if (alignment == 0)
alignment = flags  PIN_MAPPABLE ? fence_alignment :
unfenced_alignment;
@@ -3428,13 +3433,21 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object 
*obj,
if (IS_ERR(vma))
goto err_unpin;

+   if (flags  PIN_HIGH) {
+   search_flag = DRM_MM_SEARCH_BELOW;
+   alloc_flag = DRM_MM_CREATE_TOP;
+   } else {
+   search_flag = DRM_MM_SEARCH_DEFAULT;
+   alloc_flag = DRM_MM_CREATE_DEFAULT;
+   }
+
  search_free:
ret = drm_mm_insert_node_in_range_generic(vm-mm, vma-node,
  size, alignment,
  obj-cache_level,
  start, end,
- DRM_MM_SEARCH_DEFAULT,
- DRM_MM_CREATE_DEFAULT);
+ search_flag,
+ alloc_flag);
if (ret) {
ret = i915_gem_evict_something(dev, vm, size, alignment,
   obj-cache_level,
diff --git 

Re: [Intel-gfx] [PATCH v6 18/19] drm/i915/gen8: Flip the 48b switch

2015-07-29 Thread Goel, Akash



On 7/29/2015 9:54 PM, Michel Thierry wrote:

Use 48b addresses if hw supports it (i915.enable_ppgtt=3).

Note, aliasing PPGTT remains 32b only.

Signed-off-by: Michel Thierry michel.thie...@intel.com
---
  drivers/gpu/drm/i915/i915_gem_gtt.c | 5 ++---
  drivers/gpu/drm/i915/i915_params.c  | 2 +-
  2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 0d7c7c1..a7d3c07 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -108,8 +108,7 @@ static int sanitize_enable_ppgtt(struct drm_device *dev, 
int enable_ppgtt)

has_aliasing_ppgtt = INTEL_INFO(dev)-gen = 6;
has_full_ppgtt = INTEL_INFO(dev)-gen = 7;
-   has_full_64bit_ppgtt = (IS_BROADWELL(dev) ||
-   INTEL_INFO(dev)-gen = 9)  false; /* FIXME: 
64b */
+   has_full_64bit_ppgtt = IS_BROADWELL(dev) || INTEL_INFO(dev)-gen = 9;

if (intel_vgpu_active(dev))
has_full_ppgtt = false; /* emulation is too hard */
@@ -147,7 +146,7 @@ static int sanitize_enable_ppgtt(struct drm_device *dev, 
int enable_ppgtt)
}

if (INTEL_INFO(dev)-gen = 8  i915.enable_execlists)
-   return 2;
+   return has_full_64bit_ppgtt ? 3 : 2;
else
return has_aliasing_ppgtt ? 1 : 0;
  }
diff --git a/drivers/gpu/drm/i915/i915_params.c 
b/drivers/gpu/drm/i915/i915_params.c
index 5ae4b0a..d961440 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -111,7 +111,7 @@ MODULE_PARM_DESC(enable_hangcheck,
  module_param_named_unsafe(enable_ppgtt, i915.enable_ppgtt, int, 0400);
  MODULE_PARM_DESC(enable_ppgtt,
Override PPGTT usage. 
-   (-1=auto [default], 0=disabled, 1=aliasing, 2=full));
+   (-1=auto [default], 0=disabled, 1=aliasing, 2=full, 3=full_64b));

Sorry for the late comment.
Would it be better to use '_48b' here  above, instead of '_64b', to be 
precise ?

Actually in other patches also, 48 bit has been used.

Best regards
Akash



  module_param_named(enable_execlists, i915.enable_execlists, int, 0400);
  MODULE_PARM_DESC(enable_execlists,


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] i915 driver crashes on T540p if docking station attached

2015-07-29 Thread Theodore Ts'o

Unfortunately the failure causes a series of recursive faults and I
haven't been able to capture the stack trace, but on 4.2-rcX kernels,
I can reliably cause the system to crash if my T540p is booted with
the docking station attached.

It will also crash if I boot the system first, and then insert the
laptop into the dockstation.

Unfortunately, I can't get a stack trace because there are a huge
number of recursive/double faults, and the system dies so quickly that
nothing ends up in the log files.  If you really need a stack dump I
can try to rig something, but modern Laptops don't have serial
consoles any more, alas, so it's bit of a pain.

I was able to bisect it down to this commit, however: 8c7b5ccb72987:
drm/i915: Use atomic helpers for computing changed flags:

Is there any chance Intel could add a Lenovo Dockstation with a
Multistream DP output to part of your test hardware?  Unfortunately it
seems pretty common that I see regressions with my particular
hardware.  Maybe there aren't enough people using Thinkpads any more?  :-(

  - Ted


P.S.  The git bisect log

git bisect start
# bad: [421d125c06c4be4c5005cb69840206bd09b71dd6] builddeb: sign the modules 
after splitting out the debuginfo files
git bisect bad 421d125c06c4be4c5005cb69840206bd09b71dd6
# good: [b953c0d234bc72e8489d3bf51a276c5c4ec85345] Linux 4.1
git bisect good b953c0d234bc72e8489d3bf51a276c5c4ec85345
# good: [aeaa2122af4e53f3bfd28e8f294557bb95af43fc] drm/i915/skl: Add the INIT 
power domain to the MISC I/O power well
git bisect good aeaa2122af4e53f3bfd28e8f294557bb95af43fc
# bad: [4d70f38a760ad2879d2ebd84001c92980180f630] drm/i915/bios: remove a 
redundant NULL pointer check
git bisect bad 4d70f38a760ad2879d2ebd84001c92980180f630
# bad: [27a1b688d9f1fa2abd14bfe6a8729a19fb3b1b25] drm/i915/bxt: Enable 
WaEnableYV12BugFixInHalfSliceChicken7 for Broxton
git bisect bad 27a1b688d9f1fa2abd14bfe6a8729a19fb3b1b25
# good: [4be0731786de10d0e9ae1d159504c83c6b052647] drm/i915: Add crtc states 
before calling compute_config()
git bisect good 4be0731786de10d0e9ae1d159504c83c6b052647
# good: [d5432a9d19b61ba6a2b3d88f3026e0ca60eb57a1] drm/i915: Stage new modeset 
state straight into atomic state
git bisect good d5432a9d19b61ba6a2b3d88f3026e0ca60eb57a1
# bad: [a821fc46bc7bb6d4cf9a5f8d2787fd70231c2c10] drm/i915: Swap atomic state 
in legacy modeset
git bisect bad a821fc46bc7bb6d4cf9a5f8d2787fd70231c2c10
# bad: [8c7b5ccb729870e606321b3703e2c2e698c49a95] drm/i915: Use atomic helpers 
for computing changed flags
git bisect bad 8c7b5ccb729870e606321b3703e2c2e698c49a95
# good: [0f63cca2afdc38877e86acfa9821020f6e2213fd] drm/i915: Update crtc state 
active flag based on DPMS
git bisect good 0f63cca2afdc38877e86acfa9821020f6e2213fd
# good: [840bfe953384a134c8639f2964d9b74bfa671e16] drm/atomic: Make 
mode_fixup() optional for check_modeset()
git bisect good 840bfe953384a134c8639f2964d9b74bfa671e16
# first bad commit: [8c7b5ccb729870e606321b3703e2c2e698c49a95] drm/i915: Use 
atomic helpers for computing changed flags

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [REGRESSION] Re: i915 driver crashes on T540p if docking station attached

2015-07-29 Thread Linus Torvalds
On Wed, Jul 29, 2015 at 6:39 PM, Theodore Ts'o ty...@mit.edu wrote:

 It's here:  https://goo.gl/photos/xHjn2Z97JQEw6k2C9

You didn't catch enough of the code line to decode the code, but it's
early enough in drm_crtc_index() (just five bytes in) that it's almost
certainly the very first dereference, so it's almost guaranteed to be
that

   crtc-dev

access as part of list_for_each_entry(), with crtc being NULL. And
yes, -dev is the very first field, so the offset is zero too (while
the -mode_config list access would not be at offset zero).

And it looks like it is called from drm_atomic_helper_check_modeset():
the reason it has a question mark in the backtrace is because the
fault happens before the stack frame has even been set up.

There are multiple calls to drm_crtc_index() from that function, I
can't tell which one it is. Looking at the code generation I get, I
think it's because update_connector_routing() gets inlined, and that
one does several calls. Most of them look like this:

if (connector-state-crtc) {
idx = drm_crtc_index(connector-state-crtc);

ie they check that the crtc is non-NULL, but that last one does not:

connector_state-best_encoder = new_encoder;
idx = drm_crtc_index(connector_state-crtc);

crtc_state = state-crtc_states[idx];
crtc_state-mode_changed = true;

and I suspect the fix might be something like the attached. Totally
untested. Ted?

This whole atomic modeset series has been one royal fuck-up, guys.
We've had too many of these kinds of crap issues.

   Linus
 drivers/gpu/drm/drm_atomic_helper.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_atomic_helper.c 
b/drivers/gpu/drm/drm_atomic_helper.c
index 5b59d5ad7d1c..aac212297b49 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -230,10 +230,12 @@ update_connector_routing(struct drm_atomic_state *state, 
int conn_idx)
}
 
connector_state-best_encoder = new_encoder;
-   idx = drm_crtc_index(connector_state-crtc);
+   if (connector_state-crtc) {
+   idx = drm_crtc_index(connector_state-crtc);
 
-   crtc_state = state-crtc_states[idx];
-   crtc_state-mode_changed = true;
+   crtc_state = state-crtc_states[idx];
+   crtc_state-mode_changed = true;
+   }
 
DRM_DEBUG_ATOMIC([CONNECTOR:%d:%s] using [ENCODER:%d:%s] on 
[CRTC:%d]\n,
 connector-base.id,
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v5 07/19] drm/i915/gen8: Add 4 level switching infrastructure and lrc support

2015-07-29 Thread Michel Thierry

On 7/16/2015 10:33 AM, Michel Thierry wrote:

In 64b (48bit canonical) PPGTT addressing, the PDP0 register contains
the base address to PML4, while the other PDP registers are ignored.

In LRC, the addressing mode must be specified in every context
descriptor, and the base address to PML4 is stored in the reg state.

v2: PML4 update in legacy context switch is left for historic reasons,
the preferred mode of operation is with lrc context based submission.
v3: s/gen8_map_page_directory/gen8_setup_page_directory and
s/gen8_map_page_directory_pointer/gen8_setup_page_directory_pointer.
Also, clflush will be needed for bxt. (Akash)
v4: Squashed lrc-specific code and use a macro to set PML4 register.
v5: Rebase after Mika's ppgtt cleanup / scratch merge patch series.
PDP update in bb_start is only for legacy 32b mode.
v6: Rebase after final merged version of Mika's ppgtt/scratch
patches.
v7: There is no need to update the pml4 register value in
execlists_update_context (Akash)

Cc: Akash Goel akash.g...@intel.com
Signed-off-by: Ben Widawsky b...@bwidawsk.net
Signed-off-by: Michel Thierry michel.thie...@intel.com (v2+)
---
  drivers/gpu/drm/i915/i915_gem_gtt.c | 54 +
  drivers/gpu/drm/i915/i915_gem_gtt.h |  2 ++
  drivers/gpu/drm/i915/i915_reg.h |  1 +
  drivers/gpu/drm/i915/intel_lrc.c| 60 ++---
  4 files changed, 94 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 5901810..8bcd328 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -210,6 +210,9 @@ static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
 return pde;
  }

+#define gen8_pdpe_encode gen8_pde_encode
+#define gen8_pml4e_encode gen8_pde_encode
+
  static gen6_pte_t snb_pte_encode(dma_addr_t addr,
  enum i915_cache_level level,
  bool valid, u32 unused)
@@ -599,6 +602,35 @@ static void free_pdp(struct drm_device *dev,
 }
  }

+static void
+gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt,
+ struct i915_page_directory_pointer *pdp,
+ struct i915_page_directory *pd,
+ int index)
+{
+   gen8_ppgtt_pdpe_t *page_directorypo;
+
+   if (!USES_FULL_48BIT_PPGTT(ppgtt-base.dev))
+   return;
+
+   page_directorypo = kmap_px(pdp);
+   page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
+   kunmap_px(ppgtt, page_directorypo);
+}
+
+static void
+gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt,
+ struct i915_pml4 *pml4,
+ struct i915_page_directory_pointer *pdp,
+ int index)
+{
+   gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4);
+
+   WARN_ON(!USES_FULL_48BIT_PPGTT(ppgtt-base.dev));
+   pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
+   kunmap_px(ppgtt, pagemap);
+}
+
  /* Broadwell Page Directory Pointer Descriptors */
  static int gen8_write_pdp(struct drm_i915_gem_request *req,
   unsigned entry,


These _setup_ functions don't belong to this patch, and should be moved 
to the previous one in the patchset (drm/i915/gen8: implement 
alloc/free for 4lvl).

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


  1   2   >