Re: [PATCH 1/2] drm/ttm: Fix cached TTM page allocation.

2010-05-26 Thread Jerome Glisse
On Wed, May 26, 2010 at 04:18:49PM +0200, Thomas Hellstrom wrote:
 This patch fixes a regression introduced with the pool page allocator
 in the event that there are no highmem pages (for example x86_64),
 in which case cached page allocation would fail.
 
 Tested with the vmwgfx driver on a 64-bit vm.
 
 Signed-off-by: Thomas Hellstrom thellst...@vmware.com
Reviewed-by: Jerome Glisse jgli...@redhat.com

 ---
  drivers/gpu/drm/ttm/ttm_page_alloc.c |2 +-
  1 files changed, 1 insertions(+), 1 deletions(-)
 
 diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c 
 b/drivers/gpu/drm/ttm/ttm_page_alloc.c
 index 0d9a42c..b6d1523 100644
 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
 +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
 @@ -671,7 +671,7 @@ int ttm_get_pages(struct list_head *pages, int flags,
   if (flags  TTM_PAGE_FLAG_DMA32)
   gfp_flags |= GFP_DMA32;
   else
 - gfp_flags |= __GFP_HIGHMEM;
 + gfp_flags |= GFP_HIGHUSER;
  
   for (r = 0; r  count; ++r) {
   p = alloc_page(gfp_flags);
 -- 
 1.6.2.5
 
 
 --
 
 --
 ___
 Dri-devel mailing list
 Dri-devel@lists.sourceforge.net
 https://lists.sourceforge.net/lists/listinfo/dri-devel

--

--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


Re: [PATCH 2/2] drm/ttm: Fix ttm_page_alloc.c

2010-05-26 Thread Jerome Glisse
On Wed, May 26, 2010 at 04:18:50PM +0200, Thomas Hellstrom wrote:
 Fix a number of typos misspellings and checkpatch.pl warnings.
 Replace [ttm]  with TTM_PFX
 
 Signed-off-by: Thomas Hellstrom thellst...@vmware.com
Reviewed-by: Jerome Glisse jgli...@redhat.com

 ---
  drivers/gpu/drm/ttm/ttm_page_alloc.c |   62 +++--
  1 files changed, 36 insertions(+), 26 deletions(-)
 
 diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c 
 b/drivers/gpu/drm/ttm/ttm_page_alloc.c
 index b6d1523..ef91069 100644
 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
 +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
 @@ -77,7 +77,7 @@ struct ttm_page_pool {
  /**
   * Limits for the pool. They are handled without locks because only place 
 where
   * they may change is in sysfs store. They won't have immediate effect anyway
 - * so forcing serialiazation to access them is pointless.
 + * so forcing serialization to access them is pointless.
   */
  
  struct ttm_pool_opts {
 @@ -165,16 +165,18 @@ static ssize_t ttm_pool_store(struct kobject *kobj,
   m-options.small = val;
   else if (attr == ttm_page_pool_alloc_size) {
   if (val  NUM_PAGES_TO_ALLOC*8) {
 - printk(KERN_ERR [ttm] Setting allocation size to %lu 
 - is not allowed. Recomended size is 
 - %lu\n,
 - NUM_PAGES_TO_ALLOC*(PAGE_SIZE  7),
 - NUM_PAGES_TO_ALLOC*(PAGE_SIZE  10));
 + printk(KERN_ERR TTM_PFX
 +Setting allocation size to %lu 
 +is not allowed. Recommended size is 
 +%lu\n,
 +NUM_PAGES_TO_ALLOC*(PAGE_SIZE  7),
 +NUM_PAGES_TO_ALLOC*(PAGE_SIZE  10));
   return size;
   } else if (val  NUM_PAGES_TO_ALLOC) {
 - printk(KERN_WARNING [ttm] Setting allocation size to 
 - larger than %lu is not recomended.\n,
 - NUM_PAGES_TO_ALLOC*(PAGE_SIZE  10));
 + printk(KERN_WARNING TTM_PFX
 +Setting allocation size to 
 +larger than %lu is not recommended.\n,
 +NUM_PAGES_TO_ALLOC*(PAGE_SIZE  10));
   }
   m-options.alloc_size = val;
   }
 @@ -277,7 +279,7 @@ static void ttm_pages_put(struct page *pages[], unsigned 
 npages)
  {
   unsigned i;
   if (set_pages_array_wb(pages, npages))
 - printk(KERN_ERR [ttm] Failed to set %d pages to wb!\n,
 + printk(KERN_ERR TTM_PFX Failed to set %d pages to wb!\n,
   npages);
   for (i = 0; i  npages; ++i)
   __free_page(pages[i]);
 @@ -313,7 +315,8 @@ static int ttm_page_pool_free(struct ttm_page_pool *pool, 
 unsigned nr_free)
   pages_to_free = kmalloc(npages_to_free * sizeof(struct page *),
   GFP_KERNEL);
   if (!pages_to_free) {
 - printk(KERN_ERR Failed to allocate memory for pool free 
 operation.\n);
 + printk(KERN_ERR TTM_PFX
 +Failed to allocate memory for pool free operation.\n);
   return 0;
   }
  
 @@ -390,7 +393,7 @@ static int ttm_pool_get_num_unused_pages(void)
  }
  
  /**
 - * Calback for mm to request pool to reduce number of page held.
 + * Callback for mm to request pool to reduce number of page held.
   */
  static int ttm_pool_mm_shrink(int shrink_pages, gfp_t gfp_mask)
  {
 @@ -433,14 +436,16 @@ static int ttm_set_pages_caching(struct page **pages,
   case tt_uncached:
   r = set_pages_array_uc(pages, cpages);
   if (r)
 - printk(KERN_ERR [ttm] Failed to set %d pages to uc!\n,
 - cpages);
 + printk(KERN_ERR TTM_PFX
 +Failed to set %d pages to uc!\n,
 +cpages);
   break;
   case tt_wc:
   r = set_pages_array_wc(pages, cpages);
   if (r)
 - printk(KERN_ERR [ttm] Failed to set %d pages to wc!\n,
 - cpages);
 + printk(KERN_ERR TTM_PFX
 +Failed to set %d pages to wc!\n,
 +cpages);
   break;
   default:
   break;
 @@ -458,7 +463,7 @@ static void ttm_handle_caching_state_failure(struct 
 list_head *pages,
   struct page **failed_pages, unsigned cpages)
  {
   unsigned i;
 - /* Failed pages has to be reed */
 + /* Failed pages have to be freed */
   for (i = 0; i  cpages; ++i) {
   list_del(failed_pages[i]-lru);
   __free_page(failed_pages[i

Re: 2.6.33.2 kmalloc-8 slab leaks ~512 objects per second

2010-05-12 Thread Jerome Glisse
On Wed, May 12, 2010 at 11:50:41AM +0100, Tvrtko Ursulin wrote:
 On Wednesday 12 May 2010 11:38:05 Tvrtko Ursulin wrote:
 
 [snip]
  I think it is DRM (radeon) related, leak stopped when I closed all X
   programs. I am compiling 2.6.33.3 right now and will soon reboot into it.
 
 Leak is still present in 2.6.33.3 - the more GUI activity the more it leaks.
 So the ~512 objects per second was with xosview running, but moving windows or
 browsing through menus can leak a lot more (per second).
 
 Is version of userspace drm library interesting? It is 2.4.20
 (libdrm-2.4.20-12.1.x86_64).
 
 Tvrtko
 

Do the number of object stay constant over longuer period of time
(30min, 1hour) ? If there was a leak i think we would have notice
it by now, some of my desktop are running 24h for several days with
radeon kms.

Cheers,
Jerome

--

--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


Re: 2.6.34-rc5: Reported regressions from 2.6.33

2010-04-21 Thread Jerome Glisse
On Wed, Apr 21, 2010 at 07:15:38AM +0200, Rafael J. Wysocki wrote:
 On Tuesday 20 April 2010, Nick Bowler wrote:
  On 05:15 Tue 20 Apr , Rafael J. Wysocki wrote:
   If you know of any other unresolved regressions from 2.6.33, please let us
   know either and we'll add them to the list.  Also, please let us know
   if any of the entries below are invalid.
  
  Please list these two similar regressions from 2.6.33 in the r600 DRM:
  
   * r600 CS checker rejects GL_DEPTH_TEST w/o depth buffer:
 https://bugs.freedesktop.org/show_bug.cgi?id=27571
  
   * r600 CS checker rejects narrow FBO renderbuffers:
 https://bugs.freedesktop.org/show_bug.cgi?id=27609
 
 Do you want to me to add them as one entry or as two separate bugs?
 
 Rafael
 

First one is userspace bug, i need to look into the second one.
ie we were lucky the hw didn't lockup without depth buffer and
depth test enabled.

Cheers,
Jerome

--
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


[PATCH] drm/radeon/kms: print GPU family and device id when loading

2010-04-12 Thread Jerome Glisse
This will help figuring out GPU when looking at bugs log.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/radeon/radeon_device.c |   53 +++-
 drivers/gpu/drm/radeon/radeon_family.h |3 +-
 2 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_device.c 
b/drivers/gpu/drm/radeon/radeon_device.c
index c34f682..f33dc79 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -35,6 +35,54 @@
 #include radeon.h
 #include atom.h
 
+static const char radeon_family_name[][16] = {
+   R100,
+   RV100,
+   RS100,
+   RV200,
+   RS200,
+   R200,
+   RV250,
+   RS300,
+   RV280,
+   R300,
+   R350,
+   RV350,
+   RV380,
+   R420,
+   R423,
+   RV410,
+   RS400,
+   RS480,
+   RS600,
+   RS690,
+   RS740,
+   RV515,
+   R520,
+   RV530,
+   RV560,
+   RV570,
+   R580,
+   R600,
+   RV610,
+   RV630,
+   RV670,
+   RV620,
+   RV635,
+   RS780,
+   RS880,
+   RV770,
+   RV730,
+   RV710,
+   RV740,
+   CEDAR,
+   REDWOOD,
+   JUNIPER,
+   CYPRESS,
+   HEMLOCK,
+   LAST,
+};
+
 /*
  * Clear GPU surface registers.
  */
@@ -525,7 +573,6 @@ int radeon_device_init(struct radeon_device *rdev,
int r;
int dma_bits;
 
-   DRM_INFO(radeon: Initializing kernel modesetting.\n);
rdev-shutdown = false;
rdev-dev = pdev-dev;
rdev-ddev = ddev;
@@ -537,6 +584,10 @@ int radeon_device_init(struct radeon_device *rdev,
rdev-mc.gtt_size = radeon_gart_size * 1024 * 1024;
rdev-gpu_lockup = false;
rdev-accel_working = false;
+
+   DRM_INFO(initializing kernel modesetting (%s 0x%04X:0x%04X).\n,
+   radeon_family_name[rdev-family], pdev-vendor, pdev-device);
+
/* mutex initialization are all done here so we
 * can recall function without having locking issues */
mutex_init(rdev-cs_mutex);
diff --git a/drivers/gpu/drm/radeon/radeon_family.h 
b/drivers/gpu/drm/radeon/radeon_family.h
index 93c7d5d..e329066 100644
--- a/drivers/gpu/drm/radeon/radeon_family.h
+++ b/drivers/gpu/drm/radeon/radeon_family.h
@@ -36,7 +36,7 @@
  * Radeon chip families
  */
 enum radeon_family {
-   CHIP_R100,
+   CHIP_R100 = 0,
CHIP_RV100,
CHIP_RS100,
CHIP_RV200,
@@ -99,4 +99,5 @@ enum radeon_chip_flags {
RADEON_IS_PCI = 0x0080UL,
RADEON_IS_IGPGART = 0x0100UL,
 };
+
 #endif
-- 
1.7.0.1


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


[PATCH 03/13] drm/nouveau: update to TTM no_wait splitted argument

2010-04-09 Thread Jerome Glisse
This patch update radeon to the new no_wait splitted argument
TTM functionality.

Compile tested only (but thing should run as there is no
operating change from driver point of view)

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/nouveau/nouveau_bo.c  |   45 ++--
 drivers/gpu/drm/nouveau/nouveau_gem.c |2 +-
 2 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 0266124..5a167de 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -219,7 +219,7 @@ nouveau_bo_pin(struct nouveau_bo *nvbo, uint32_t memtype)
for (i = 0; i  nvbo-placement.num_placement; i++)
nvbo-placements[i] |= TTM_PL_FLAG_NO_EVICT;
 
-   ret = ttm_bo_validate(bo, nvbo-placement, false, false);
+   ret = ttm_bo_validate(bo, nvbo-placement, false, false, false);
if (ret == 0) {
switch (bo-mem.mem_type) {
case TTM_PL_VRAM:
@@ -256,7 +256,7 @@ nouveau_bo_unpin(struct nouveau_bo *nvbo)
for (i = 0; i  nvbo-placement.num_placement; i++)
nvbo-placements[i] = ~TTM_PL_FLAG_NO_EVICT;
 
-   ret = ttm_bo_validate(bo, nvbo-placement, false, false);
+   ret = ttm_bo_validate(bo, nvbo-placement, false, false, false);
if (ret == 0) {
switch (bo-mem.mem_type) {
case TTM_PL_VRAM:
@@ -456,7 +456,8 @@ nouveau_bo_evict_flags(struct ttm_buffer_object *bo, struct 
ttm_placement *pl)
 
 static int
 nouveau_bo_move_accel_cleanup(struct nouveau_channel *chan,
- struct nouveau_bo *nvbo, bool evict, bool no_wait,
+ struct nouveau_bo *nvbo, bool evict,
+ bool no_wait_reserve, bool no_wait_gpu,
  struct ttm_mem_reg *new_mem)
 {
struct nouveau_fence *fence = NULL;
@@ -467,7 +468,7 @@ nouveau_bo_move_accel_cleanup(struct nouveau_channel *chan,
return ret;
 
ret = ttm_bo_move_accel_cleanup(nvbo-bo, fence, NULL,
-   evict, no_wait, new_mem);
+   evict, no_wait_reserve, no_wait_gpu, 
new_mem);
if (nvbo-channel  nvbo-channel != chan)
ret = nouveau_fence_wait(fence, NULL, false, false);
nouveau_fence_unref((void *)fence);
@@ -491,7 +492,8 @@ nouveau_bo_mem_ctxdma(struct nouveau_bo *nvbo, struct 
nouveau_channel *chan,
 
 static int
 nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
-int no_wait, struct ttm_mem_reg *new_mem)
+bool no_wait_reserve, bool no_wait_gpu,
+struct ttm_mem_reg *new_mem)
 {
struct nouveau_bo *nvbo = nouveau_bo(bo);
struct drm_nouveau_private *dev_priv = nouveau_bdev(bo-bdev);
@@ -569,12 +571,13 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int 
evict, bool intr,
dst_offset += (PAGE_SIZE * line_count);
}
 
-   return nouveau_bo_move_accel_cleanup(chan, nvbo, evict, no_wait, 
new_mem);
+   return nouveau_bo_move_accel_cleanup(chan, nvbo, evict, 
no_wait_reserve, no_wait_gpu, new_mem);
 }
 
 static int
 nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr,
- bool no_wait, struct ttm_mem_reg *new_mem)
+ bool no_wait_reserve, bool no_wait_gpu,
+ struct ttm_mem_reg *new_mem)
 {
u32 placement_memtype = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING;
struct ttm_placement placement;
@@ -587,7 +590,7 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool 
evict, bool intr,
 
tmp_mem = *new_mem;
tmp_mem.mm_node = NULL;
-   ret = ttm_bo_mem_space(bo, placement, tmp_mem, intr, no_wait);
+   ret = ttm_bo_mem_space(bo, placement, tmp_mem, intr, no_wait_reserve, 
no_wait_gpu);
if (ret)
return ret;
 
@@ -595,11 +598,11 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool 
evict, bool intr,
if (ret)
goto out;
 
-   ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait, tmp_mem);
+   ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait_reserve, 
no_wait_gpu, tmp_mem);
if (ret)
goto out;
 
-   ret = ttm_bo_move_ttm(bo, evict, no_wait, new_mem);
+   ret = ttm_bo_move_ttm(bo, evict, no_wait_reserve, no_wait_gpu, new_mem);
 out:
if (tmp_mem.mm_node) {
spin_lock(bo-bdev-glob-lru_lock);
@@ -612,7 +615,8 @@ out:
 
 static int
 nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict, bool intr,
- bool no_wait, struct ttm_mem_reg *new_mem)
+ bool no_wait_reserve, bool no_wait_gpu,
+ struct ttm_mem_reg *new_mem)
 {
u32 placement_memtype = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING

[PATCH 01/13] drm/ttm: split no_wait argument in 2 GPU or reserve wait

2010-04-09 Thread Jerome Glisse
There is case where we want to be able to wait only for the
GPU while not waiting for other buffer to be unreserved. This
patch split the no_wait argument all the way down in the whole
ttm path so that upper level can decide on what to wait on or
not.

This patch break the API to other modules, update to others
driver are following in separate patches.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/ttm/ttm_bo.c  |   57 
 drivers/gpu/drm/ttm/ttm_bo_util.c |9 --
 include/drm/ttm/ttm_bo_api.h  |6 ++-
 include/drm/ttm/ttm_bo_driver.h   |   29 +++---
 4 files changed, 60 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index dd47b2a..40631e2 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -357,7 +357,8 @@ static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, 
bool zero_alloc)
 
 static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
  struct ttm_mem_reg *mem,
- bool evict, bool interruptible, bool no_wait)
+ bool evict, bool interruptible,
+ bool no_wait_reserve, bool no_wait_gpu)
 {
struct ttm_bo_device *bdev = bo-bdev;
bool old_is_pci = ttm_mem_reg_is_pci(bdev, bo-mem);
@@ -402,12 +403,12 @@ static int ttm_bo_handle_move_mem(struct 
ttm_buffer_object *bo,
 
if (!(old_man-flags  TTM_MEMTYPE_FLAG_FIXED) 
!(new_man-flags  TTM_MEMTYPE_FLAG_FIXED))
-   ret = ttm_bo_move_ttm(bo, evict, no_wait, mem);
+   ret = ttm_bo_move_ttm(bo, evict, no_wait_reserve, no_wait_gpu, 
mem);
else if (bdev-driver-move)
ret = bdev-driver-move(bo, evict, interruptible,
-no_wait, mem);
+no_wait_reserve, no_wait_gpu, mem);
else
-   ret = ttm_bo_move_memcpy(bo, evict, no_wait, mem);
+   ret = ttm_bo_move_memcpy(bo, evict, no_wait_reserve, 
no_wait_gpu, mem);
 
if (ret)
goto out_err;
@@ -606,7 +607,7 @@ void ttm_bo_unref(struct ttm_buffer_object **p_bo)
 EXPORT_SYMBOL(ttm_bo_unref);
 
 static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible,
-   bool no_wait)
+   bool no_wait_reserve, bool no_wait_gpu)
 {
struct ttm_bo_device *bdev = bo-bdev;
struct ttm_bo_global *glob = bo-glob;
@@ -615,7 +616,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool 
interruptible,
int ret = 0;
 
spin_lock(bo-lock);
-   ret = ttm_bo_wait(bo, false, interruptible, no_wait);
+   ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
spin_unlock(bo-lock);
 
if (unlikely(ret != 0)) {
@@ -638,7 +639,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool 
interruptible,
placement.num_busy_placement = 0;
bdev-driver-evict_flags(bo, placement);
ret = ttm_bo_mem_space(bo, placement, evict_mem, interruptible,
-   no_wait);
+   no_wait_reserve, no_wait_gpu);
if (ret) {
if (ret != -ERESTARTSYS) {
printk(KERN_ERR TTM_PFX
@@ -650,7 +651,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool 
interruptible,
}
 
ret = ttm_bo_handle_move_mem(bo, evict_mem, true, interruptible,
-no_wait);
+no_wait_reserve, no_wait_gpu);
if (ret) {
if (ret != -ERESTARTSYS)
printk(KERN_ERR TTM_PFX Buffer eviction failed\n);
@@ -670,7 +671,8 @@ out:
 
 static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
uint32_t mem_type,
-   bool interruptible, bool no_wait)
+   bool interruptible, bool no_wait_reserve,
+   bool no_wait_gpu)
 {
struct ttm_bo_global *glob = bdev-glob;
struct ttm_mem_type_manager *man = bdev-man[mem_type];
@@ -687,11 +689,11 @@ retry:
bo = list_first_entry(man-lru, struct ttm_buffer_object, lru);
kref_get(bo-list_kref);
 
-   ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
+   ret = ttm_bo_reserve_locked(bo, false, no_wait_reserve, false, 0);
 
if (unlikely(ret == -EBUSY)) {
spin_unlock(glob-lru_lock);
-   if (likely(!no_wait))
+   if (likely(!no_wait_gpu))
ret = ttm_bo_wait_unreserved(bo, interruptible);
 
kref_put(bo-list_kref, ttm_bo_release_list);
@@ -713,7 +715,7 @@ retry:
while (put_count--)
kref_put(bo-list_kref, ttm_bo_ref_bug);
 
-   ret = ttm_bo_evict(bo

[PATCH 04/13] drm/vmwgfx: update to TTM no_wait splitted argument

2010-04-09 Thread Jerome Glisse
This patch update radeon to the new no_wait splitted argument
TTM functionality.

Compile tested only (but thing should run as there is no
operating change from driver point of view)

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c |4 ++--
 drivers/gpu/drm/vmwgfx/vmwgfx_fb.c  |4 ++--
 drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c |2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 0897359..dbd36b8 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -570,7 +570,7 @@ static int vmw_validate_single_buffer(struct vmw_private 
*dev_priv,
 * Put BO in VRAM, only if there is space.
 */
 
-   ret = ttm_bo_validate(bo, vmw_vram_sys_placement, true, false);
+   ret = ttm_bo_validate(bo, vmw_vram_sys_placement, true, false, false);
if (unlikely(ret == -ERESTARTSYS))
return ret;
 
@@ -590,7 +590,7 @@ static int vmw_validate_single_buffer(struct vmw_private 
*dev_priv,
 * previous contents.
 */
 
-   ret = ttm_bo_validate(bo, vmw_vram_placement, true, false);
+   ret = ttm_bo_validate(bo, vmw_vram_placement, true, false, false);
return ret;
 }
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
index a933670..80125ff 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
@@ -628,7 +628,7 @@ int vmw_dmabuf_from_vram(struct vmw_private *vmw_priv,
if (unlikely(ret != 0))
return ret;
 
-   ret = ttm_bo_validate(bo, vmw_sys_placement, false, false);
+   ret = ttm_bo_validate(bo, vmw_sys_placement, false, false, false);
ttm_bo_unreserve(bo);
 
return ret;
@@ -652,7 +652,7 @@ int vmw_dmabuf_to_start_of_vram(struct vmw_private 
*vmw_priv,
if (unlikely(ret != 0))
goto err_unlock;
 
-   ret = ttm_bo_validate(bo, ne_placement, false, false);
+   ret = ttm_bo_validate(bo, ne_placement, false, false, false);
ttm_bo_unreserve(bo);
 err_unlock:
ttm_write_unlock(vmw_priv-active_master-lock);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
index 5b6eabe..ad566c8 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
@@ -118,7 +118,7 @@ static int vmw_dmabuf_pin_in_vram(struct vmw_private 
*dev_priv,
if (pin)
overlay_placement = vmw_vram_ne_placement;
 
-   ret = ttm_bo_validate(bo, overlay_placement, interruptible, false);
+   ret = ttm_bo_validate(bo, overlay_placement, interruptible, false, 
false);
 
ttm_bo_unreserve(bo);
 
-- 
1.7.0.1


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


[PATCH 09/13] drm/radeon/kms: don't initialize TTM io memory manager field

2010-04-09 Thread Jerome Glisse
This isn't needed anymore with the new TTM fault callback

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/radeon/radeon_ttm.c |   13 +
 1 files changed, 1 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
b/drivers/gpu/drm/radeon/radeon_ttm.c
index 91333ec..d51c1c4 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -162,21 +162,13 @@ static int radeon_init_mem_type(struct ttm_bo_device 
*bdev, uint32_t type,
  (unsigned)type);
return -EINVAL;
}
-   man-io_offset = rdev-mc.agp_base;
-   man-io_size = rdev-mc.gtt_size;
-   man-io_addr = NULL;
if (!rdev-ddev-agp-cant_use_aperture)
man-flags = TTM_MEMTYPE_FLAG_MAPPABLE;
man-available_caching = TTM_PL_FLAG_UNCACHED |
 TTM_PL_FLAG_WC;
man-default_caching = TTM_PL_FLAG_WC;
-   } else
-#endif
-   {
-   man-io_offset = 0;
-   man-io_size = 0;
-   man-io_addr = NULL;
}
+#endif
break;
case TTM_PL_VRAM:
/* On-card video ram */
@@ -185,9 +177,6 @@ static int radeon_init_mem_type(struct ttm_bo_device *bdev, 
uint32_t type,
 TTM_MEMTYPE_FLAG_MAPPABLE;
man-available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC;
man-default_caching = TTM_PL_FLAG_WC;
-   man-io_addr = NULL;
-   man-io_offset = rdev-mc.aper_base;
-   man-io_size = rdev-mc.aper_size;
break;
default:
DRM_ERROR(Unsupported memory type %u\n, (unsigned)type);
-- 
1.7.0.1


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


unmappable vram v8

2010-04-09 Thread Jerome Glisse
Ok so here again this time ioremap is kept inside ttm so we don't
waste ioremap when faulting page.

Cheers,
Jerome


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


[PATCH 02/13] drm/radeon/kms: update to TTM no_wait splitted argument

2010-04-09 Thread Jerome Glisse
This patch update radeon to the new no_wait splitted argument
TTM functionality.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/radeon/radeon_object.c |6 ++--
 drivers/gpu/drm/radeon/radeon_ttm.c|   39 +--
 2 files changed, 24 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_object.c 
b/drivers/gpu/drm/radeon/radeon_object.c
index dc7e3f4..4b441f8 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -191,7 +191,7 @@ int radeon_bo_pin(struct radeon_bo *bo, u32 domain, u64 
*gpu_addr)
}
for (i = 0; i  bo-placement.num_placement; i++)
bo-placements[i] |= TTM_PL_FLAG_NO_EVICT;
-   r = ttm_bo_validate(bo-tbo, bo-placement, false, false);
+   r = ttm_bo_validate(bo-tbo, bo-placement, false, false, false);
if (likely(r == 0)) {
bo-pin_count = 1;
if (gpu_addr != NULL)
@@ -215,7 +215,7 @@ int radeon_bo_unpin(struct radeon_bo *bo)
return 0;
for (i = 0; i  bo-placement.num_placement; i++)
bo-placements[i] = ~TTM_PL_FLAG_NO_EVICT;
-   r = ttm_bo_validate(bo-tbo, bo-placement, false, false);
+   r = ttm_bo_validate(bo-tbo, bo-placement, false, false, false);
if (unlikely(r != 0))
dev_err(bo-rdev-dev, %p validate failed for unpin\n, bo);
return r;
@@ -330,7 +330,7 @@ int radeon_bo_list_validate(struct list_head *head)
lobj-rdomain);
}
r = ttm_bo_validate(bo-tbo, bo-placement,
-   true, false);
+   true, false, false);
if (unlikely(r))
return r;
}
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
b/drivers/gpu/drm/radeon/radeon_ttm.c
index fc787e8..992dfc8 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -244,9 +244,9 @@ static void radeon_move_null(struct ttm_buffer_object *bo,
 }
 
 static int radeon_move_blit(struct ttm_buffer_object *bo,
-   bool evict, int no_wait,
-   struct ttm_mem_reg *new_mem,
-   struct ttm_mem_reg *old_mem)
+   bool evict, int no_wait_reserve, bool no_wait_gpu,
+   struct ttm_mem_reg *new_mem,
+   struct ttm_mem_reg *old_mem)
 {
struct radeon_device *rdev;
uint64_t old_start, new_start;
@@ -290,13 +290,14 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
r = radeon_copy(rdev, old_start, new_start, new_mem-num_pages, fence);
/* FIXME: handle copy error */
r = ttm_bo_move_accel_cleanup(bo, (void *)fence, NULL,
- evict, no_wait, new_mem);
+ evict, no_wait_reserve, no_wait_gpu, 
new_mem);
radeon_fence_unref(fence);
return r;
 }
 
 static int radeon_move_vram_ram(struct ttm_buffer_object *bo,
-   bool evict, bool interruptible, bool no_wait,
+   bool evict, bool interruptible,
+   bool no_wait_reserve, bool no_wait_gpu,
struct ttm_mem_reg *new_mem)
 {
struct radeon_device *rdev;
@@ -317,7 +318,7 @@ static int radeon_move_vram_ram(struct ttm_buffer_object 
*bo,
placement.busy_placement = placements;
placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
r = ttm_bo_mem_space(bo, placement, tmp_mem,
-interruptible, no_wait);
+interruptible, no_wait_reserve, no_wait_gpu);
if (unlikely(r)) {
return r;
}
@@ -331,11 +332,11 @@ static int radeon_move_vram_ram(struct ttm_buffer_object 
*bo,
if (unlikely(r)) {
goto out_cleanup;
}
-   r = radeon_move_blit(bo, true, no_wait, tmp_mem, old_mem);
+   r = radeon_move_blit(bo, true, no_wait_reserve, no_wait_gpu, tmp_mem, 
old_mem);
if (unlikely(r)) {
goto out_cleanup;
}
-   r = ttm_bo_move_ttm(bo, true, no_wait, new_mem);
+   r = ttm_bo_move_ttm(bo, true, no_wait_reserve, no_wait_gpu, new_mem);
 out_cleanup:
if (tmp_mem.mm_node) {
struct ttm_bo_global *glob = rdev-mman.bdev.glob;
@@ -349,7 +350,8 @@ out_cleanup:
 }
 
 static int radeon_move_ram_vram(struct ttm_buffer_object *bo,
-   bool evict, bool interruptible, bool no_wait,
+   bool evict, bool interruptible,
+   bool no_wait_reserve, bool no_wait_gpu,
struct ttm_mem_reg *new_mem)
 {
struct

[PATCH 11/13] drm/vmwgfx: don't initialize TTM io memory manager field

2010-04-09 Thread Jerome Glisse
This isn't needed anymore with the new TTM fault callback

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c |6 --
 1 files changed, 0 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
index f355896..c4f5114 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
@@ -137,9 +137,6 @@ int vmw_invalidate_caches(struct ttm_bo_device *bdev, 
uint32_t flags)
 int vmw_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
  struct ttm_mem_type_manager *man)
 {
-   struct vmw_private *dev_priv =
-   container_of(bdev, struct vmw_private, bdev);
-
switch (type) {
case TTM_PL_SYSTEM:
/* System memory */
@@ -151,10 +148,7 @@ int vmw_init_mem_type(struct ttm_bo_device *bdev, uint32_t 
type,
case TTM_PL_VRAM:
/* On-card video ram */
man-gpu_offset = 0;
-   man-io_offset = dev_priv-vram_start;
-   man-io_size = dev_priv-vram_size;
man-flags = TTM_MEMTYPE_FLAG_FIXED | TTM_MEMTYPE_FLAG_MAPPABLE;
-   man-io_addr = NULL;
man-available_caching = TTM_PL_MASK_CACHING;
man-default_caching = TTM_PL_FLAG_WC;
break;
-- 
1.7.0.1


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


[PATCH 05/13] drm/ttm: ttm_fault callback to allow driver to handle bo placement V6

2010-04-09 Thread Jerome Glisse
On fault the driver is given the opportunity to perform any operation
it sees fit in order to place the buffer into a CPU visible area of
memory. This patch doesn't break TTM users, nouveau, vmwgfx and radeon
should keep working properly. Future patch will take advantage of this
infrastructure and remove the old path from TTM once driver are
converted.

V2 return VM_FAULT_NOPAGE if callback return -EBUSY or -ERESTARTSYS
V3 balance io_mem_reserve and io_mem_free call, fault_reserve_notify
   is responsible to perform any necessary task for mapping to succeed
V4 minor cleanup, atomic_t - bool as member is protected by reserve
   mecanism from concurent access
V5 the callback is now responsible for iomapping the bo and providing
   a virtual address this simplify TTM and will allow to get rid of
   TTM_MEMTYPE_FLAG_NEEDS_IOREMAP
V6 use the bus addr data to decide to ioremap or this isn't needed
   but we don't necesarily need to ioremap in the callback but still
   allow driver to use static mapping

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/ttm/ttm_bo.c  |7 ++-
 drivers/gpu/drm/ttm/ttm_bo_util.c |  124 ++--
 drivers/gpu/drm/ttm/ttm_bo_vm.c   |   41 ++--
 include/drm/ttm/ttm_bo_api.h  |   23 +++
 include/drm/ttm/ttm_bo_driver.h   |   16 +-
 5 files changed, 126 insertions(+), 85 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 40631e2..b42e3fa 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -632,6 +632,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool 
interruptible,
 
evict_mem = bo-mem;
evict_mem.mm_node = NULL;
+   evict_mem.bus.io_reserved = false;
 
placement.fpfn = 0;
placement.lpfn = 0;
@@ -1005,6 +1006,7 @@ int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
mem.num_pages = bo-num_pages;
mem.size = mem.num_pages  PAGE_SHIFT;
mem.page_alignment = bo-mem.page_alignment;
+   mem.bus.io_reserved = false;
/*
 * Determine where to move the buffer.
 */
@@ -1160,6 +1162,7 @@ int ttm_bo_init(struct ttm_bo_device *bdev,
bo-mem.num_pages = bo-num_pages;
bo-mem.mm_node = NULL;
bo-mem.page_alignment = page_alignment;
+   bo-mem.bus.io_reserved = false;
bo-buffer_start = buffer_start  PAGE_MASK;
bo-priv_flags = 0;
bo-mem.placement = (TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED);
@@ -1574,7 +1577,7 @@ int ttm_bo_pci_offset(struct ttm_bo_device *bdev,
if (ttm_mem_reg_is_pci(bdev, mem)) {
*bus_offset = mem-mm_node-start  PAGE_SHIFT;
*bus_size = mem-num_pages  PAGE_SHIFT;
-   *bus_base = man-io_offset;
+   *bus_base = man-io_offset + (uintptr_t)man-io_addr;
}
 
return 0;
@@ -1588,8 +1591,8 @@ void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo)
 
if (!bdev-dev_mapping)
return;
-
unmap_mapping_range(bdev-dev_mapping, offset, holelen, 1);
+   ttm_mem_io_free(bdev, bo-mem);
 }
 EXPORT_SYMBOL(ttm_bo_unmap_virtual);
 
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 865b2a8..d58eeb5 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -81,30 +81,62 @@ int ttm_bo_move_ttm(struct ttm_buffer_object *bo,
 }
 EXPORT_SYMBOL(ttm_bo_move_ttm);
 
+int ttm_mem_io_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
+{
+   struct ttm_mem_type_manager *man = bdev-man[mem-mem_type];
+   int ret;
+
+   if (bdev-driver-io_mem_reserve) {
+   if (!mem-bus.io_reserved) {
+   mem-bus.io_reserved = true;
+   ret = bdev-driver-io_mem_reserve(bdev, mem);
+   if (unlikely(ret != 0))
+   return ret;
+   }
+   } else {
+   ret = ttm_bo_pci_offset(bdev, mem, mem-bus.base, 
mem-bus.offset, mem-bus.size);
+   if (unlikely(ret != 0))
+   return ret;
+   mem-bus.addr = NULL;
+   if (!(man-flags  TTM_MEMTYPE_FLAG_NEEDS_IOREMAP))
+   mem-bus.addr = (void *)(((u8 *)man-io_addr) + 
mem-bus.offset);
+   mem-bus.is_iomem = (mem-bus.size  0) ? 1 : 0;
+   }
+   return 0;
+}
+
+void ttm_mem_io_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
+{
+   if (bdev-driver-io_mem_reserve) {
+   if (mem-bus.io_reserved) {
+   mem-bus.io_reserved = false;
+   bdev-driver-io_mem_free(bdev, mem);
+   }
+   }
+}
+
 int ttm_mem_reg_ioremap(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem,
void **virtual)
 {
-   struct ttm_mem_type_manager *man = bdev-man[mem-mem_type];
-   unsigned long bus_offset;
-   unsigned long

[PATCH 13/13] drm/radeon/kms: enable use of unmappable VRAM V2

2010-04-09 Thread Jerome Glisse
This patch enable the use of unmappable VRAM thanks to
previous TTM infrastructure change.

V2 update after io_mem_reserve/io_mem_free callback balancing

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/radeon/evergreen.c |5 -
 drivers/gpu/drm/radeon/r100.c  |5 -
 drivers/gpu/drm/radeon/r600.c  |5 -
 drivers/gpu/drm/radeon/rv770.c |5 -
 4 files changed, 0 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/radeon/evergreen.c 
b/drivers/gpu/drm/radeon/evergreen.c
index 7672f11..48e0dfb 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -475,11 +475,6 @@ int evergreen_mc_init(struct radeon_device *rdev)
rdev-mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
rdev-mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
rdev-mc.visible_vram_size = rdev-mc.aper_size;
-   /* FIXME remove this once we support unmappable VRAM */
-   if (rdev-mc.mc_vram_size  rdev-mc.aper_size) {
-   rdev-mc.mc_vram_size = rdev-mc.aper_size;
-   rdev-mc.real_vram_size = rdev-mc.aper_size;
-   }
r600_vram_gtt_location(rdev, rdev-mc);
radeon_update_bandwidth_info(rdev);
 
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 7a4a4fc..f53dd52 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -2035,11 +2035,6 @@ void r100_vram_init_sizes(struct radeon_device *rdev)
else
rdev-mc.mc_vram_size = rdev-mc.real_vram_size;
}
-   /* FIXME remove this once we support unmappable VRAM */
-   if (rdev-mc.mc_vram_size  rdev-mc.aper_size) {
-   rdev-mc.mc_vram_size = rdev-mc.aper_size;
-   rdev-mc.real_vram_size = rdev-mc.aper_size;
-   }
 }
 
 void r100_vga_set_state(struct radeon_device *rdev, bool state)
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 13c9cc3..fa3c1fe 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -713,11 +713,6 @@ int r600_mc_init(struct radeon_device *rdev)
rdev-mc.mc_vram_size = RREG32(CONFIG_MEMSIZE);
rdev-mc.real_vram_size = RREG32(CONFIG_MEMSIZE);
rdev-mc.visible_vram_size = rdev-mc.aper_size;
-   /* FIXME remove this once we support unmappable VRAM */
-   if (rdev-mc.mc_vram_size  rdev-mc.aper_size) {
-   rdev-mc.mc_vram_size = rdev-mc.aper_size;
-   rdev-mc.real_vram_size = rdev-mc.aper_size;
-   }
r600_vram_gtt_location(rdev, rdev-mc);
 
if (rdev-flags  RADEON_IS_IGP)
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 2b8a4e1..599121b 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -905,11 +905,6 @@ int rv770_mc_init(struct radeon_device *rdev)
rdev-mc.mc_vram_size = RREG32(CONFIG_MEMSIZE);
rdev-mc.real_vram_size = RREG32(CONFIG_MEMSIZE);
rdev-mc.visible_vram_size = rdev-mc.aper_size;
-   /* FIXME remove this once we support unmappable VRAM */
-   if (rdev-mc.mc_vram_size  rdev-mc.aper_size) {
-   rdev-mc.mc_vram_size = rdev-mc.aper_size;
-   rdev-mc.real_vram_size = rdev-mc.aper_size;
-   }
r600_vram_gtt_location(rdev, rdev-mc);
radeon_update_bandwidth_info(rdev);
 
-- 
1.7.0.1


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


[PATCH 06/13] drm/radeon/kms: add support for new fault callback V7

2010-04-09 Thread Jerome Glisse
This add the support for the new fault callback and also the
infrastructure for supporting unmappable VRAM.

V2 validate BO with no_wait = true
V3 don't derefence bo-mem.mm_node as it's not NULL only for
   VRAM or GTT
V4 update to splitted no_wait ttm change
V5 update to new balanced io_mem_reserve/free change
V6 callback is responsible for iomapping memory
V7 move back iomapping to ttm

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/radeon/radeon_object.c |   26 +++-
 drivers/gpu/drm/radeon/radeon_object.h |2 +-
 drivers/gpu/drm/radeon/radeon_ttm.c|   51 +--
 3 files changed, 72 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_object.c 
b/drivers/gpu/drm/radeon/radeon_object.c
index 4b441f8..57b3f95 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -498,11 +498,33 @@ void radeon_bo_move_notify(struct ttm_buffer_object *bo,
radeon_bo_check_tiling(rbo, 0, 1);
 }
 
-void radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
+int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 {
+   struct radeon_device *rdev;
struct radeon_bo *rbo;
+   unsigned long offset, size;
+   int r;
+
if (!radeon_ttm_bo_is_radeon_bo(bo))
-   return;
+   return 0;
rbo = container_of(bo, struct radeon_bo, tbo);
radeon_bo_check_tiling(rbo, 0, 0);
+   rdev = rbo-rdev;
+   if (bo-mem.mem_type == TTM_PL_VRAM) {
+   size = bo-mem.num_pages  PAGE_SHIFT;
+   offset = bo-mem.mm_node-start  PAGE_SHIFT;
+   if ((offset + size)  rdev-mc.visible_vram_size) {
+   /* hurrah the memory is not visible ! */
+   radeon_ttm_placement_from_domain(rbo, 
RADEON_GEM_DOMAIN_VRAM);
+   rbo-placement.lpfn = rdev-mc.visible_vram_size  
PAGE_SHIFT;
+   r = ttm_bo_validate(bo, rbo-placement, false, true, 
false);
+   if (unlikely(r != 0))
+   return r;
+   offset = bo-mem.mm_node-start  PAGE_SHIFT;
+   /* this should not happen */
+   if ((offset + size)  rdev-mc.visible_vram_size)
+   return -EINVAL;
+   }
+   }
+   return 0;
 }
diff --git a/drivers/gpu/drm/radeon/radeon_object.h 
b/drivers/gpu/drm/radeon/radeon_object.h
index 7ab43de..353998d 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -168,6 +168,6 @@ extern int radeon_bo_check_tiling(struct radeon_bo *bo, 
bool has_moved,
bool force_drop);
 extern void radeon_bo_move_notify(struct ttm_buffer_object *bo,
struct ttm_mem_reg *mem);
-extern void radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
+extern int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
 extern int radeon_bo_get_surface_reg(struct radeon_bo *bo);
 #endif
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
b/drivers/gpu/drm/radeon/radeon_ttm.c
index 992dfc8..91333ec 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -166,8 +166,7 @@ static int radeon_init_mem_type(struct ttm_bo_device *bdev, 
uint32_t type,
man-io_size = rdev-mc.gtt_size;
man-io_addr = NULL;
if (!rdev-ddev-agp-cant_use_aperture)
-   man-flags = TTM_MEMTYPE_FLAG_NEEDS_IOREMAP |
-TTM_MEMTYPE_FLAG_MAPPABLE;
+   man-flags = TTM_MEMTYPE_FLAG_MAPPABLE;
man-available_caching = TTM_PL_FLAG_UNCACHED |
 TTM_PL_FLAG_WC;
man-default_caching = TTM_PL_FLAG_WC;
@@ -183,7 +182,6 @@ static int radeon_init_mem_type(struct ttm_bo_device *bdev, 
uint32_t type,
/* On-card video ram */
man-gpu_offset = rdev-mc.vram_start;
man-flags = TTM_MEMTYPE_FLAG_FIXED |
-TTM_MEMTYPE_FLAG_NEEDS_IOREMAP |
 TTM_MEMTYPE_FLAG_MAPPABLE;
man-available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC;
man-default_caching = TTM_PL_FLAG_WC;
@@ -438,10 +436,53 @@ static int radeon_bo_move(struct ttm_buffer_object *bo,
 memcpy:
r = ttm_bo_move_memcpy(bo, evict, no_wait_reserve, no_wait_gpu, 
new_mem);
}
-
return r;
 }
 
+static int radeon_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct 
ttm_mem_reg *mem)
+{
+   struct ttm_mem_type_manager *man = bdev-man[mem-mem_type];
+   struct radeon_device *rdev = radeon_get_rdev(bdev);
+
+   mem-bus.addr = NULL;
+   mem-bus.offset = 0

[PATCH 07/13] drm/nouveau/kms: add support for new TTM fault callback V5

2010-04-09 Thread Jerome Glisse
This add the support for the new fault callback, does change anything
from driver point of view, thought it should allow nouveau to add
support for unmappable VRAM.

Improvement: store the aperture base in a variable so that we don't
call a function to get it on each fault.

Patch hasn't been tested on any hw.

V2 don't derefence bo-mem.mm_node as it's not NULL only for
   VRAM or GTT
V3 update after io_mem_reserve/io_mem_free callback balancing
V4 callback has to ioremap
V5 ioremap is done by ttm

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/nouveau/nouveau_bo.c |   58 +++--
 1 files changed, 54 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 5a167de..288c2ec 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -386,8 +386,7 @@ nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, 
uint32_t type,
break;
case TTM_PL_VRAM:
man-flags = TTM_MEMTYPE_FLAG_FIXED |
-TTM_MEMTYPE_FLAG_MAPPABLE |
-TTM_MEMTYPE_FLAG_NEEDS_IOREMAP;
+TTM_MEMTYPE_FLAG_MAPPABLE;
man-available_caching = TTM_PL_FLAG_UNCACHED |
 TTM_PL_FLAG_WC;
man-default_caching = TTM_PL_FLAG_WC;
@@ -403,8 +402,7 @@ nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, 
uint32_t type,
case TTM_PL_TT:
switch (dev_priv-gart_info.type) {
case NOUVEAU_GART_AGP:
-   man-flags = TTM_MEMTYPE_FLAG_MAPPABLE |
-TTM_MEMTYPE_FLAG_NEEDS_IOREMAP;
+   man-flags = TTM_MEMTYPE_FLAG_MAPPABLE;
man-available_caching = TTM_PL_FLAG_UNCACHED;
man-default_caching = TTM_PL_FLAG_UNCACHED;
break;
@@ -761,6 +759,55 @@ nouveau_bo_verify_access(struct ttm_buffer_object *bo, 
struct file *filp)
return 0;
 }
 
+static int
+nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
+{
+   struct ttm_mem_type_manager *man = bdev-man[mem-mem_type];
+   struct drm_nouveau_private *dev_priv = nouveau_bdev(bdev);
+   struct drm_device *dev = dev_priv-dev;
+
+   mem-bus.addr = NULL;
+   mem-bus.offset = 0;
+   mem-bus.size = mem-num_pages  PAGE_SHIFT;
+   mem-bus.base = 0;
+   mem-bus.is_iomem = false;
+   if (!(man-flags  TTM_MEMTYPE_FLAG_MAPPABLE))
+   return -EINVAL;
+   switch (mem-mem_type) {
+   case TTM_PL_SYSTEM:
+   /* System memory */
+   return 0;
+   case TTM_PL_TT:
+#if __OS_HAS_AGP
+   if (dev_priv-gart_info.type == NOUVEAU_GART_AGP) {
+   mem-bus.offset = mem-mm_node-start  PAGE_SHIFT;
+   mem-bus.base = dev_priv-gart_info.aper_base;
+   mem-bus.is_iomem = true;
+   }
+#endif
+   break;
+   case TTM_PL_VRAM:
+   mem-bus.offset = mem-mm_node-start  PAGE_SHIFT;
+   mem-bus.base = drm_get_resource_start(dev, 1);
+   mem-bus.is_iomem = true;
+   break;
+   default:
+   return -EINVAL;
+   }
+   return 0;
+}
+
+static void
+nouveau_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
+{
+}
+
+static int
+nouveau_ttm_fault_reserve_notify(struct ttm_buffer_object *bo)
+{
+   return 0;
+}
+
 struct ttm_bo_driver nouveau_bo_driver = {
.create_ttm_backend_entry = nouveau_bo_create_ttm_backend_entry,
.invalidate_caches = nouveau_bo_invalidate_caches,
@@ -773,5 +820,8 @@ struct ttm_bo_driver nouveau_bo_driver = {
.sync_obj_flush = nouveau_fence_flush,
.sync_obj_unref = nouveau_fence_unref,
.sync_obj_ref = nouveau_fence_ref,
+   .fault_reserve_notify = nouveau_ttm_fault_reserve_notify,
+   .io_mem_reserve = nouveau_ttm_io_mem_reserve,
+   .io_mem_free = nouveau_ttm_io_mem_free,
 };
 
-- 
1.7.0.1


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


[PATCH 08/13] drm/vmwgfx: add support for new TTM fault callback V5

2010-04-09 Thread Jerome Glisse
This add the support for the new fault callback, does change anything
from driver point of view.

Improvement: store the aperture base in a variable so that we don't
call a function to get it on each fault.

Patch hasn't been tested.

V2 don't derefence bo-mem.mm_node as it's not NULL only for
   VRAM or GTT
V3 update after io_mem_reserve/io_mem_free callback balancing
V4 callback has to ioremap
V5 ioremap is done by TTM

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c |   44 +--
 1 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
index 825ebe3..f355896 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
@@ -153,8 +153,7 @@ int vmw_init_mem_type(struct ttm_bo_device *bdev, uint32_t 
type,
man-gpu_offset = 0;
man-io_offset = dev_priv-vram_start;
man-io_size = dev_priv-vram_size;
-   man-flags = TTM_MEMTYPE_FLAG_FIXED |
-   TTM_MEMTYPE_FLAG_NEEDS_IOREMAP | TTM_MEMTYPE_FLAG_MAPPABLE;
+   man-flags = TTM_MEMTYPE_FLAG_FIXED | TTM_MEMTYPE_FLAG_MAPPABLE;
man-io_addr = NULL;
man-available_caching = TTM_PL_MASK_CACHING;
man-default_caching = TTM_PL_FLAG_WC;
@@ -193,6 +192,42 @@ static void vmw_swap_notify(struct ttm_buffer_object *bo)
vmw_dmabuf_gmr_unbind(bo);
 }
 
+static int vmw_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct 
ttm_mem_reg *mem)
+{
+   struct ttm_mem_type_manager *man = bdev-man[mem-mem_type];
+   struct vmw_private *dev_priv = container_of(bdev, struct vmw_private, 
bdev);
+
+   mem-bus.addr = NULL;
+   mem-bus.is_iomem = false;
+   mem-bus.offset = 0;
+   mem-bus.size = mem-num_pages  PAGE_SHIFT;
+   mem-bus.base = 0;
+   if (!(man-flags  TTM_MEMTYPE_FLAG_MAPPABLE))
+   return -EINVAL;
+   switch (mem-mem_type) {
+   case TTM_PL_SYSTEM:
+   /* System memory */
+   return 0;
+   case TTM_PL_VRAM:
+   mem-bus.offset = mem-mm_node-start  PAGE_SHIFT;
+   mem-bus.base = dev_priv-vram_start;
+   mem-bus.is_iomem = true;
+   break;
+   default:
+   return -EINVAL;
+   }
+   return 0;
+}
+
+static void vmw_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg 
*mem)
+{
+}
+
+static int vmw_ttm_fault_reserve_notify(struct ttm_buffer_object *bo)
+{
+   return 0;
+}
+
 /**
  * FIXME: We're using the old vmware polling method to sync.
  * Do this with fences instead.
@@ -248,5 +283,8 @@ struct ttm_bo_driver vmw_bo_driver = {
.sync_obj_unref = vmw_sync_obj_unref,
.sync_obj_ref = vmw_sync_obj_ref,
.move_notify = vmw_move_notify,
-   .swap_notify = vmw_swap_notify
+   .swap_notify = vmw_swap_notify,
+   .fault_reserve_notify = vmw_ttm_fault_reserve_notify,
+   .io_mem_reserve = vmw_ttm_io_mem_reserve,
+   .io_mem_free = vmw_ttm_io_mem_free,
 };
-- 
1.7.0.1


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


[PATCH 10/13] drm/nouveau/kms: don't initialize TTM io memory manager field

2010-04-09 Thread Jerome Glisse
This isn't needed anymore with the new TTM fault callback

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/nouveau/nouveau_bo.c |   11 ---
 1 files changed, 0 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 288c2ec..34be192 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -390,13 +390,6 @@ nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, 
uint32_t type,
man-available_caching = TTM_PL_FLAG_UNCACHED |
 TTM_PL_FLAG_WC;
man-default_caching = TTM_PL_FLAG_WC;
-
-   man-io_addr = NULL;
-   man-io_offset = drm_get_resource_start(dev, 1);
-   man-io_size = drm_get_resource_len(dev, 1);
-   if (man-io_size  nouveau_mem_fb_amount(dev))
-   man-io_size = nouveau_mem_fb_amount(dev);
-
man-gpu_offset = dev_priv-vm_vram_base;
break;
case TTM_PL_TT:
@@ -417,10 +410,6 @@ nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, 
uint32_t type,
 dev_priv-gart_info.type);
return -EINVAL;
}
-
-   man-io_offset  = dev_priv-gart_info.aper_base;
-   man-io_size= dev_priv-gart_info.aper_size;
-   man-io_addr   = NULL;
man-gpu_offset = dev_priv-vm_gart_base;
break;
default:
-- 
1.7.0.1


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


[PATCH 12/13] drm/ttm: remove io_ field from TTM V6

2010-04-09 Thread Jerome Glisse
All TTM driver have been converted to new io_mem_reserve/free
interface which allow driver to choose and return proper io
base, offset to core TTM for ioremapping if necessary. This
patch remove what is now deadcode.

V2 adapt to match with change in first patch of the patchset
V3 update after io_mem_reserve/io_mem_free callback balancing
V4 adjust to minor cleanup
V5 remove the needs ioremap flag
V6 keep the ioremapping facility in TTM
---
 drivers/gpu/drm/ttm/ttm_bo.c  |   22 --
 drivers/gpu/drm/ttm/ttm_bo_util.c |   19 ---
 include/drm/ttm/ttm_bo_driver.h   |   12 
 3 files changed, 4 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index b42e3fa..3b5b094 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -79,8 +79,6 @@ static void ttm_mem_type_debug(struct ttm_bo_device *bdev, 
int mem_type)
printk(KERN_ERR TTM_PFX use_type: %d\n, man-use_type);
printk(KERN_ERR TTM_PFX flags: 0x%08X\n, man-flags);
printk(KERN_ERR TTM_PFX gpu_offset: 0x%08lX\n, man-gpu_offset);
-   printk(KERN_ERR TTM_PFX io_offset: 0x%08lX\n, man-io_offset);
-   printk(KERN_ERR TTM_PFX io_size: %ld\n, man-io_size);
printk(KERN_ERR TTM_PFX size: %llu\n, man-size);
printk(KERN_ERR TTM_PFX available_caching: 0x%08X\n,
man-available_caching);
@@ -1563,26 +1561,6 @@ bool ttm_mem_reg_is_pci(struct ttm_bo_device *bdev, 
struct ttm_mem_reg *mem)
return true;
 }
 
-int ttm_bo_pci_offset(struct ttm_bo_device *bdev,
- struct ttm_mem_reg *mem,
- unsigned long *bus_base,
- unsigned long *bus_offset, unsigned long *bus_size)
-{
-   struct ttm_mem_type_manager *man = bdev-man[mem-mem_type];
-
-   *bus_size = 0;
-   if (!(man-flags  TTM_MEMTYPE_FLAG_MAPPABLE))
-   return -EINVAL;
-
-   if (ttm_mem_reg_is_pci(bdev, mem)) {
-   *bus_offset = mem-mm_node-start  PAGE_SHIFT;
-   *bus_size = mem-num_pages  PAGE_SHIFT;
-   *bus_base = man-io_offset + (uintptr_t)man-io_addr;
-   }
-
-   return 0;
-}
-
 void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo)
 {
struct ttm_bo_device *bdev = bo-bdev;
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index d58eeb5..333b401 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -83,24 +83,13 @@ EXPORT_SYMBOL(ttm_bo_move_ttm);
 
 int ttm_mem_io_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
 {
-   struct ttm_mem_type_manager *man = bdev-man[mem-mem_type];
int ret;
 
-   if (bdev-driver-io_mem_reserve) {
-   if (!mem-bus.io_reserved) {
-   mem-bus.io_reserved = true;
-   ret = bdev-driver-io_mem_reserve(bdev, mem);
-   if (unlikely(ret != 0))
-   return ret;
-   }
-   } else {
-   ret = ttm_bo_pci_offset(bdev, mem, mem-bus.base, 
mem-bus.offset, mem-bus.size);
+   if (!mem-bus.io_reserved) {
+   mem-bus.io_reserved = true;
+   ret = bdev-driver-io_mem_reserve(bdev, mem);
if (unlikely(ret != 0))
return ret;
-   mem-bus.addr = NULL;
-   if (!(man-flags  TTM_MEMTYPE_FLAG_NEEDS_IOREMAP))
-   mem-bus.addr = (void *)(((u8 *)man-io_addr) + 
mem-bus.offset);
-   mem-bus.is_iomem = (mem-bus.size  0) ? 1 : 0;
}
return 0;
 }
@@ -149,7 +138,7 @@ void ttm_mem_reg_iounmap(struct ttm_bo_device *bdev, struct 
ttm_mem_reg *mem,
 
man = bdev-man[mem-mem_type];
 
-   if (virtual  (man-flags  TTM_MEMTYPE_FLAG_NEEDS_IOREMAP || 
mem-bus.addr == NULL))
+   if (virtual  mem-bus.addr == NULL)
iounmap(virtual);
ttm_mem_io_free(bdev, mem);
 }
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index da39865..7720b17 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -176,8 +176,6 @@ struct ttm_tt {
 
 #define TTM_MEMTYPE_FLAG_FIXED (1  0)/* Fixed (on-card) PCI 
memory */
 #define TTM_MEMTYPE_FLAG_MAPPABLE  (1  1)/* Memory mappable */
-#define TTM_MEMTYPE_FLAG_NEEDS_IOREMAP (1  2)/* Fixed memory needs 
ioremap
-  before kernel access. */
 #define TTM_MEMTYPE_FLAG_CMA   (1  3)/* Can't map aperture */
 
 /**
@@ -189,13 +187,6 @@ struct ttm_tt {
  * managed by this memory type.
  * @gpu_offset: If used, the GPU offset of the first managed page of
  * fixed memory or the first managed location in an aperture.
- * @io_offset: The io_offset of the first managed page of IO memory or
- * the first managed location in an 

radeondb capturing replaying GPU command stream

2010-04-08 Thread Jerome Glisse
Hi all,

So i pushed a new dump facility in libdrm, it will dump
everything needed to replay a command stream you just need
to set CS_BOF_DUMP to 1 but be aware that any application
that you launch will than create a new file for each cs
it sends and file can be several M or maybe even G if you
try dumping a program with hugue number of big texture.

The dump file format is inefficiant and dump everythings
and don't try to take advantages of buffer being the same
from one cs to the other but i wasted a couple of week trying
to be clever and i don't think it's worth the trouble it
only makes things more complex and more buggy. So i am
now quite happy with the current design after all it's
intended only to be use as a debug helper while writting
driver or trying to fix a driver bug.

Of course this would be pretty useless without a way to
replay those file so i did hackup a quick tools radeondb
that i wish to grow to swiss knife for radeon debugging.
Some of its function needs to be run outside X with KMS
enabled and as root (replaying cs needs that).

So to replay a command stream simply do :
radeondb -p mycsfile.bof
You shall see the content of the rendered buffer before
the cs get executed, press a key and you shall see the
content of the rendered buffer once the cs is executed.
I was able to replay openarena cs so i am pretty confident
that it should work well. The program also check that
you are replaying a command stream on a valid gpu ie
don't try to replay cs of r3xx hw on r5xx hw.

I intend to do an X friendly version of the tools at
one point. In the meantime i will be adding r5xx,r3xx
support over the weekend and latter r1xx,r2xx (most
of what is needed is autogenerated file see r600_states.h).

I am also working on adding the same dump format to
kernel to capture cs responsible of GPU lockup.

Idea/comments are welcome

Cheers,
Jerome

radeondb should show up at:
http://cgit.freedesktop.org/~glisse/radeondb

--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


Re: radeondb capturing replaying GPU command stream

2010-04-08 Thread Jerome Glisse
On Thu, Apr 08, 2010 at 11:09:33PM +0300, Pauli Nieminen wrote:
 On Thu, Apr 8, 2010 at 7:10 PM, Jerome Glisse gli...@freedesktop.org wrote:
  Hi all,
 
  So i pushed a new dump facility in libdrm, it will dump
  everything needed to replay a command stream you just need
  to set CS_BOF_DUMP to 1 but be aware that any application
  that you launch will than create a new file for each cs
  it sends and file can be several M or maybe even G if you
  try dumping a program with hugue number of big texture.
 
  The dump file format is inefficiant and dump everythings
  and don't try to take advantages of buffer being the same
  from one cs to the other but i wasted a couple of week trying
  to be clever and i don't think it's worth the trouble it
  only makes things more complex and more buggy. So i am
  now quite happy with the current design after all it's
  intended only to be use as a debug helper while writting
  driver or trying to fix a driver bug.
 
 
 How about some zlib compression? That should reduce the size of dump
 quite nicely when there is a lot of duplicate data.

It could save space but it's could become too cpu intensive,
i really think that the size should be that much of an issue
at capture time. We can always compress the file latter for
sending.

Cheers,
Jerome

  Of course this would be pretty useless without a way to
  replay those file so i did hackup a quick tools radeondb
  that i wish to grow to swiss knife for radeon debugging.
  Some of its function needs to be run outside X with KMS
  enabled and as root (replaying cs needs that).
 
  So to replay a command stream simply do :
  radeondb -p mycsfile.bof
  You shall see the content of the rendered buffer before
  the cs get executed, press a key and you shall see the
  content of the rendered buffer once the cs is executed.
  I was able to replay openarena cs so i am pretty confident
  that it should work well. The program also check that
  you are replaying a command stream on a valid gpu ie
  don't try to replay cs of r3xx hw on r5xx hw.
 
  I intend to do an X friendly version of the tools at
  one point. In the meantime i will be adding r5xx,r3xx
  support over the weekend and latter r1xx,r2xx (most
  of what is needed is autogenerated file see r600_states.h).
 
  I am also working on adding the same dump format to
  kernel to capture cs responsible of GPU lockup.
 
  Idea/comments are welcome
 
  Cheers,
  Jerome
 
  radeondb should show up at:
  http://cgit.freedesktop.org/~glisse/radeondb
 

--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


[PATCH 11/13] drm/vmwgfx: don't initialize TTM io memory manager field

2010-04-07 Thread Jerome Glisse
This isn't needed anymore with the new TTM fault callback

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c |6 --
 1 files changed, 0 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
index 9d5a08d..25d332a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
@@ -137,9 +137,6 @@ int vmw_invalidate_caches(struct ttm_bo_device *bdev, 
uint32_t flags)
 int vmw_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
  struct ttm_mem_type_manager *man)
 {
-   struct vmw_private *dev_priv =
-   container_of(bdev, struct vmw_private, bdev);
-
switch (type) {
case TTM_PL_SYSTEM:
/* System memory */
@@ -151,10 +148,7 @@ int vmw_init_mem_type(struct ttm_bo_device *bdev, uint32_t 
type,
case TTM_PL_VRAM:
/* On-card video ram */
man-gpu_offset = 0;
-   man-io_offset = dev_priv-vram_start;
-   man-io_size = dev_priv-vram_size;
man-flags = TTM_MEMTYPE_FLAG_FIXED | TTM_MEMTYPE_FLAG_MAPPABLE;
-   man-io_addr = NULL;
man-available_caching = TTM_PL_MASK_CACHING;
man-default_caching = TTM_PL_FLAG_WC;
break;
-- 
1.6.6.1


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


[PATCH 04/13] drm/vmwgfx: update to TTM no_wait splitted argument

2010-04-07 Thread Jerome Glisse
This patch update radeon to the new no_wait splitted argument
TTM functionality.

Compile tested only (but thing should run as there is no
operating change from driver point of view)

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c |4 ++--
 drivers/gpu/drm/vmwgfx/vmwgfx_fb.c  |4 ++--
 drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c |2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 0897359..dbd36b8 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -570,7 +570,7 @@ static int vmw_validate_single_buffer(struct vmw_private 
*dev_priv,
 * Put BO in VRAM, only if there is space.
 */
 
-   ret = ttm_bo_validate(bo, vmw_vram_sys_placement, true, false);
+   ret = ttm_bo_validate(bo, vmw_vram_sys_placement, true, false, false);
if (unlikely(ret == -ERESTARTSYS))
return ret;
 
@@ -590,7 +590,7 @@ static int vmw_validate_single_buffer(struct vmw_private 
*dev_priv,
 * previous contents.
 */
 
-   ret = ttm_bo_validate(bo, vmw_vram_placement, true, false);
+   ret = ttm_bo_validate(bo, vmw_vram_placement, true, false, false);
return ret;
 }
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
index a933670..80125ff 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
@@ -628,7 +628,7 @@ int vmw_dmabuf_from_vram(struct vmw_private *vmw_priv,
if (unlikely(ret != 0))
return ret;
 
-   ret = ttm_bo_validate(bo, vmw_sys_placement, false, false);
+   ret = ttm_bo_validate(bo, vmw_sys_placement, false, false, false);
ttm_bo_unreserve(bo);
 
return ret;
@@ -652,7 +652,7 @@ int vmw_dmabuf_to_start_of_vram(struct vmw_private 
*vmw_priv,
if (unlikely(ret != 0))
goto err_unlock;
 
-   ret = ttm_bo_validate(bo, ne_placement, false, false);
+   ret = ttm_bo_validate(bo, ne_placement, false, false, false);
ttm_bo_unreserve(bo);
 err_unlock:
ttm_write_unlock(vmw_priv-active_master-lock);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
index 5b6eabe..ad566c8 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
@@ -118,7 +118,7 @@ static int vmw_dmabuf_pin_in_vram(struct vmw_private 
*dev_priv,
if (pin)
overlay_placement = vmw_vram_ne_placement;
 
-   ret = ttm_bo_validate(bo, overlay_placement, interruptible, false);
+   ret = ttm_bo_validate(bo, overlay_placement, interruptible, false, 
false);
 
ttm_bo_unreserve(bo);
 
-- 
1.6.6.1


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


[PATCH 02/13] drm/radeon/kms: update to TTM no_wait splitted argument

2010-04-07 Thread Jerome Glisse
This patch update radeon to the new no_wait splitted argument
TTM functionality.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/radeon/radeon_object.c |6 ++--
 drivers/gpu/drm/radeon/radeon_ttm.c|   39 +--
 2 files changed, 24 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_object.c 
b/drivers/gpu/drm/radeon/radeon_object.c
index fc9d00a..3adfa88 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -189,7 +189,7 @@ int radeon_bo_pin(struct radeon_bo *bo, u32 domain, u64 
*gpu_addr)
bo-placement.lpfn = bo-rdev-mc.visible_vram_size  PAGE_SHIFT;
for (i = 0; i  bo-placement.num_placement; i++)
bo-placements[i] |= TTM_PL_FLAG_NO_EVICT;
-   r = ttm_bo_validate(bo-tbo, bo-placement, false, false);
+   r = ttm_bo_validate(bo-tbo, bo-placement, false, false, false);
if (likely(r == 0)) {
bo-pin_count = 1;
if (gpu_addr != NULL)
@@ -213,7 +213,7 @@ int radeon_bo_unpin(struct radeon_bo *bo)
return 0;
for (i = 0; i  bo-placement.num_placement; i++)
bo-placements[i] = ~TTM_PL_FLAG_NO_EVICT;
-   r = ttm_bo_validate(bo-tbo, bo-placement, false, false);
+   r = ttm_bo_validate(bo-tbo, bo-placement, false, false, false);
if (unlikely(r != 0))
dev_err(bo-rdev-dev, %p validate failed for unpin\n, bo);
return r;
@@ -328,7 +328,7 @@ int radeon_bo_list_validate(struct list_head *head)
lobj-rdomain);
}
r = ttm_bo_validate(bo-tbo, bo-placement,
-   true, false);
+   true, false, false);
if (unlikely(r))
return r;
}
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
b/drivers/gpu/drm/radeon/radeon_ttm.c
index 43c5ab3..ba4724c 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -243,9 +243,9 @@ static void radeon_move_null(struct ttm_buffer_object *bo,
 }
 
 static int radeon_move_blit(struct ttm_buffer_object *bo,
-   bool evict, int no_wait,
-   struct ttm_mem_reg *new_mem,
-   struct ttm_mem_reg *old_mem)
+   bool evict, int no_wait_reserve, bool no_wait_gpu,
+   struct ttm_mem_reg *new_mem,
+   struct ttm_mem_reg *old_mem)
 {
struct radeon_device *rdev;
uint64_t old_start, new_start;
@@ -289,13 +289,14 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
r = radeon_copy(rdev, old_start, new_start, new_mem-num_pages, fence);
/* FIXME: handle copy error */
r = ttm_bo_move_accel_cleanup(bo, (void *)fence, NULL,
- evict, no_wait, new_mem);
+ evict, no_wait_reserve, no_wait_gpu, 
new_mem);
radeon_fence_unref(fence);
return r;
 }
 
 static int radeon_move_vram_ram(struct ttm_buffer_object *bo,
-   bool evict, bool interruptible, bool no_wait,
+   bool evict, bool interruptible,
+   bool no_wait_reserve, bool no_wait_gpu,
struct ttm_mem_reg *new_mem)
 {
struct radeon_device *rdev;
@@ -316,7 +317,7 @@ static int radeon_move_vram_ram(struct ttm_buffer_object 
*bo,
placement.busy_placement = placements;
placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
r = ttm_bo_mem_space(bo, placement, tmp_mem,
-interruptible, no_wait);
+interruptible, no_wait_reserve, no_wait_gpu);
if (unlikely(r)) {
return r;
}
@@ -330,11 +331,11 @@ static int radeon_move_vram_ram(struct ttm_buffer_object 
*bo,
if (unlikely(r)) {
goto out_cleanup;
}
-   r = radeon_move_blit(bo, true, no_wait, tmp_mem, old_mem);
+   r = radeon_move_blit(bo, true, no_wait_reserve, no_wait_gpu, tmp_mem, 
old_mem);
if (unlikely(r)) {
goto out_cleanup;
}
-   r = ttm_bo_move_ttm(bo, true, no_wait, new_mem);
+   r = ttm_bo_move_ttm(bo, true, no_wait_reserve, no_wait_gpu, new_mem);
 out_cleanup:
if (tmp_mem.mm_node) {
struct ttm_bo_global *glob = rdev-mman.bdev.glob;
@@ -348,7 +349,8 @@ out_cleanup:
 }
 
 static int radeon_move_ram_vram(struct ttm_buffer_object *bo,
-   bool evict, bool interruptible, bool no_wait,
+   bool evict, bool interruptible,
+   bool no_wait_reserve, bool no_wait_gpu

unmappable vram V7

2010-04-07 Thread Jerome Glisse
So here is i hope the last spawn of the patch serie,
i drop the needs ioremap flag and callback is responsible
for ioremapping memory and providing a valid virtual address
this simplify TTM code. I haven't change the value of enum
after removing dead flag, Thomas maybe you prefer to change
the enum values ?

Again only tested on radeon, just compile tested for vmwgfx
and nouveau.

Cheers,
Jerome


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


[PATCH 06/13] drm/radeon/kms: add support for new fault callback V6

2010-04-07 Thread Jerome Glisse
This add the support for the new fault callback and also the
infrastructure for supporting unmappable VRAM.

V2 validate BO with no_wait = true
V3 don't derefence bo-mem.mm_node as it's not NULL only for
   VRAM or GTT
V4 update to splitted no_wait ttm change
V5 update to new balanced io_mem_reserve/free change
V6 callback is responsible for iomapping memory

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/radeon/radeon_object.c |   26 +-
 drivers/gpu/drm/radeon/radeon_object.h |2 +-
 drivers/gpu/drm/radeon/radeon_ttm.c|   59 +--
 3 files changed, 80 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_object.c 
b/drivers/gpu/drm/radeon/radeon_object.c
index 3adfa88..aef44f3 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -496,11 +496,33 @@ void radeon_bo_move_notify(struct ttm_buffer_object *bo,
radeon_bo_check_tiling(rbo, 0, 1);
 }
 
-void radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
+int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 {
+   struct radeon_device *rdev;
struct radeon_bo *rbo;
+   unsigned long offset, size;
+   int r;
+
if (!radeon_ttm_bo_is_radeon_bo(bo))
-   return;
+   return 0;
rbo = container_of(bo, struct radeon_bo, tbo);
radeon_bo_check_tiling(rbo, 0, 0);
+   rdev = rbo-rdev;
+   if (bo-mem.mem_type == TTM_PL_VRAM) {
+   size = bo-mem.num_pages  PAGE_SHIFT;
+   offset = bo-mem.mm_node-start  PAGE_SHIFT;
+   if ((offset + size)  rdev-mc.visible_vram_size) {
+   /* hurrah the memory is not visible ! */
+   radeon_ttm_placement_from_domain(rbo, 
RADEON_GEM_DOMAIN_VRAM);
+   rbo-placement.lpfn = rdev-mc.visible_vram_size  
PAGE_SHIFT;
+   r = ttm_bo_validate(bo, rbo-placement, false, true, 
false);
+   if (unlikely(r != 0))
+   return r;
+   offset = bo-mem.mm_node-start  PAGE_SHIFT;
+   /* this should not happen */
+   if ((offset + size)  rdev-mc.visible_vram_size)
+   return -EINVAL;
+   }
+   }
+   return 0;
 }
diff --git a/drivers/gpu/drm/radeon/radeon_object.h 
b/drivers/gpu/drm/radeon/radeon_object.h
index 7ab43de..353998d 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -168,6 +168,6 @@ extern int radeon_bo_check_tiling(struct radeon_bo *bo, 
bool has_moved,
bool force_drop);
 extern void radeon_bo_move_notify(struct ttm_buffer_object *bo,
struct ttm_mem_reg *mem);
-extern void radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
+extern int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
 extern int radeon_bo_get_surface_reg(struct radeon_bo *bo);
 #endif
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
b/drivers/gpu/drm/radeon/radeon_ttm.c
index ba4724c..1fc6cad 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -165,8 +165,7 @@ static int radeon_init_mem_type(struct ttm_bo_device *bdev, 
uint32_t type,
man-io_size = rdev-mc.gtt_size;
man-io_addr = NULL;
if (!rdev-ddev-agp-cant_use_aperture)
-   man-flags = TTM_MEMTYPE_FLAG_NEEDS_IOREMAP |
-TTM_MEMTYPE_FLAG_MAPPABLE;
+   man-flags = TTM_MEMTYPE_FLAG_MAPPABLE;
man-available_caching = TTM_PL_FLAG_UNCACHED |
 TTM_PL_FLAG_WC;
man-default_caching = TTM_PL_FLAG_WC;
@@ -182,7 +181,6 @@ static int radeon_init_mem_type(struct ttm_bo_device *bdev, 
uint32_t type,
/* On-card video ram */
man-gpu_offset = rdev-mc.vram_start;
man-flags = TTM_MEMTYPE_FLAG_FIXED |
-TTM_MEMTYPE_FLAG_NEEDS_IOREMAP |
 TTM_MEMTYPE_FLAG_MAPPABLE;
man-available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC;
man-default_caching = TTM_PL_FLAG_WC;
@@ -437,10 +435,61 @@ static int radeon_bo_move(struct ttm_buffer_object *bo,
 memcpy:
r = ttm_bo_move_memcpy(bo, evict, no_wait_reserve, no_wait_gpu, 
new_mem);
}
-
return r;
 }
 
+static int radeon_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct 
ttm_mem_reg *mem)
+{
+   struct ttm_mem_type_manager *man = bdev-man[mem-mem_type];
+   struct radeon_device *rdev = radeon_get_rdev(bdev);
+
+   mem-bus.addr = NULL;
+   mem-bus.offset = 0;
+   mem-bus.size = mem

[PATCH 12/13] drm/ttm: remove io_ field from TTM V5

2010-04-07 Thread Jerome Glisse
All TTM driver have been converted to new io_mem_reserve/free
interface which allow driver to choose and return proper io
base, offset to core TTM for ioremapping if necessary. This
patch remove what is now deadcode.

V2 adapt to match with change in first patch of the patchset
V3 update after io_mem_reserve/io_mem_free callback balancing
V4 adjust to minor cleanup
V5 remove the needs ioremap flag

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/ttm/ttm_bo.c  |   22 --
 drivers/gpu/drm/ttm/ttm_bo_util.c |   80 ++---
 include/drm/ttm/ttm_bo_api.h  |4 +-
 include/drm/ttm/ttm_bo_driver.h   |   12 --
 4 files changed, 13 insertions(+), 105 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 2171f86..1f27cf2 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -79,8 +79,6 @@ static void ttm_mem_type_debug(struct ttm_bo_device *bdev, 
int mem_type)
printk(KERN_ERR TTM_PFX use_type: %d\n, man-use_type);
printk(KERN_ERR TTM_PFX flags: 0x%08X\n, man-flags);
printk(KERN_ERR TTM_PFX gpu_offset: 0x%08lX\n, man-gpu_offset);
-   printk(KERN_ERR TTM_PFX io_offset: 0x%08lX\n, man-io_offset);
-   printk(KERN_ERR TTM_PFX io_size: %ld\n, man-io_size);
printk(KERN_ERR TTM_PFX size: %llu\n, man-size);
printk(KERN_ERR TTM_PFX available_caching: 0x%08X\n,
man-available_caching);
@@ -1563,26 +1561,6 @@ bool ttm_mem_reg_is_pci(struct ttm_bo_device *bdev, 
struct ttm_mem_reg *mem)
return true;
 }
 
-int ttm_bo_pci_offset(struct ttm_bo_device *bdev,
- struct ttm_mem_reg *mem,
- unsigned long *bus_base,
- unsigned long *bus_offset, unsigned long *bus_size)
-{
-   struct ttm_mem_type_manager *man = bdev-man[mem-mem_type];
-
-   *bus_size = 0;
-   if (!(man-flags  TTM_MEMTYPE_FLAG_MAPPABLE))
-   return -EINVAL;
-
-   if (ttm_mem_reg_is_pci(bdev, mem)) {
-   *bus_offset = mem-mm_node-start  PAGE_SHIFT;
-   *bus_size = mem-num_pages  PAGE_SHIFT;
-   *bus_base = man-io_offset + (uintptr_t)man-io_addr;
-   }
-
-   return 0;
-}
-
 void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo)
 {
struct ttm_bo_device *bdev = bo-bdev;
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 8493f39..15d1bf3 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -83,61 +83,33 @@ EXPORT_SYMBOL(ttm_bo_move_ttm);
 
 int ttm_mem_io_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
 {
-   struct ttm_mem_type_manager *man = bdev-man[mem-mem_type];
-   int ret;
+   int ret = 0;
 
-   if (bdev-driver-io_mem_reserve) {
-   if (!mem-bus.io_reserved) {
-   mem-bus.io_reserved = true;
-   ret = bdev-driver-io_mem_reserve(bdev, mem);
-   if (unlikely(ret != 0))
-   return ret;
-   }
-   } else {
-   ret = ttm_bo_pci_offset(bdev, mem, mem-bus.base, 
mem-bus.offset, mem-bus.size);
-   if (unlikely(ret != 0))
-   return ret;
-   mem-bus.addr = man-io_addr;
-   mem-bus.is_iomem = (mem-bus.size  0) ? 1 : 0;
+   if (!mem-bus.io_reserved) {
+   mem-bus.io_reserved = true;
+   ret = bdev-driver-io_mem_reserve(bdev, mem);
}
-   return 0;
+   return ret;
 }
 
 void ttm_mem_io_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
 {
-   if (bdev-driver-io_mem_reserve) {
-   if (mem-bus.io_reserved) {
-   mem-bus.io_reserved = false;
-   bdev-driver-io_mem_free(bdev, mem);
-   }
+   if (mem-bus.io_reserved) {
+   mem-bus.io_reserved = false;
+   bdev-driver-io_mem_free(bdev, mem);
}
 }
 
 int ttm_mem_reg_ioremap(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem,
void **virtual)
 {
-   struct ttm_mem_type_manager *man = bdev-man[mem-mem_type];
int ret;
-   void *addr;
 
*virtual = NULL;
ret = ttm_mem_io_reserve(bdev, mem);
if (ret)
return ret;
-
-   if (!(man-flags  TTM_MEMTYPE_FLAG_NEEDS_IOREMAP)) {
-   addr = mem-bus.addr;
-   } else {
-   if (mem-placement  TTM_PL_FLAG_WC)
-   addr = ioremap_wc(mem-bus.base + mem-bus.offset, 
mem-bus.size);
-   else
-   addr = ioremap_nocache(mem-bus.base + mem-bus.offset, 
mem-bus.size);
-   if (!addr) {
-   ttm_mem_io_free(bdev, mem);
-   return -ENOMEM;
-   }
-   }
-   *virtual = addr;
+   *virtual

[PATCH 10/13] drm/nouveau/kms: don't initialize TTM io memory manager field

2010-04-07 Thread Jerome Glisse
This isn't needed anymore with the new TTM fault callback

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/nouveau/nouveau_bo.c |   11 ---
 1 files changed, 0 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index efc124d..5ee48ec 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -390,13 +390,6 @@ nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, 
uint32_t type,
man-available_caching = TTM_PL_FLAG_UNCACHED |
 TTM_PL_FLAG_WC;
man-default_caching = TTM_PL_FLAG_WC;
-
-   man-io_addr = NULL;
-   man-io_offset = drm_get_resource_start(dev, 1);
-   man-io_size = drm_get_resource_len(dev, 1);
-   if (man-io_size  nouveau_mem_fb_amount(dev))
-   man-io_size = nouveau_mem_fb_amount(dev);
-
man-gpu_offset = dev_priv-vm_vram_base;
break;
case TTM_PL_TT:
@@ -417,10 +410,6 @@ nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, 
uint32_t type,
 dev_priv-gart_info.type);
return -EINVAL;
}
-
-   man-io_offset  = dev_priv-gart_info.aper_base;
-   man-io_size= dev_priv-gart_info.aper_size;
-   man-io_addr   = NULL;
man-gpu_offset = dev_priv-vm_gart_base;
break;
default:
-- 
1.6.6.1


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


[PATCH 03/13] drm/nouveau: update to TTM no_wait splitted argument

2010-04-07 Thread Jerome Glisse
This patch update radeon to the new no_wait splitted argument
TTM functionality.

Compile tested only (but thing should run as there is no
operating change from driver point of view)

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/nouveau/nouveau_bo.c  |   45 ++--
 drivers/gpu/drm/nouveau/nouveau_gem.c |2 +-
 2 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 0266124..5a167de 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -219,7 +219,7 @@ nouveau_bo_pin(struct nouveau_bo *nvbo, uint32_t memtype)
for (i = 0; i  nvbo-placement.num_placement; i++)
nvbo-placements[i] |= TTM_PL_FLAG_NO_EVICT;
 
-   ret = ttm_bo_validate(bo, nvbo-placement, false, false);
+   ret = ttm_bo_validate(bo, nvbo-placement, false, false, false);
if (ret == 0) {
switch (bo-mem.mem_type) {
case TTM_PL_VRAM:
@@ -256,7 +256,7 @@ nouveau_bo_unpin(struct nouveau_bo *nvbo)
for (i = 0; i  nvbo-placement.num_placement; i++)
nvbo-placements[i] = ~TTM_PL_FLAG_NO_EVICT;
 
-   ret = ttm_bo_validate(bo, nvbo-placement, false, false);
+   ret = ttm_bo_validate(bo, nvbo-placement, false, false, false);
if (ret == 0) {
switch (bo-mem.mem_type) {
case TTM_PL_VRAM:
@@ -456,7 +456,8 @@ nouveau_bo_evict_flags(struct ttm_buffer_object *bo, struct 
ttm_placement *pl)
 
 static int
 nouveau_bo_move_accel_cleanup(struct nouveau_channel *chan,
- struct nouveau_bo *nvbo, bool evict, bool no_wait,
+ struct nouveau_bo *nvbo, bool evict,
+ bool no_wait_reserve, bool no_wait_gpu,
  struct ttm_mem_reg *new_mem)
 {
struct nouveau_fence *fence = NULL;
@@ -467,7 +468,7 @@ nouveau_bo_move_accel_cleanup(struct nouveau_channel *chan,
return ret;
 
ret = ttm_bo_move_accel_cleanup(nvbo-bo, fence, NULL,
-   evict, no_wait, new_mem);
+   evict, no_wait_reserve, no_wait_gpu, 
new_mem);
if (nvbo-channel  nvbo-channel != chan)
ret = nouveau_fence_wait(fence, NULL, false, false);
nouveau_fence_unref((void *)fence);
@@ -491,7 +492,8 @@ nouveau_bo_mem_ctxdma(struct nouveau_bo *nvbo, struct 
nouveau_channel *chan,
 
 static int
 nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
-int no_wait, struct ttm_mem_reg *new_mem)
+bool no_wait_reserve, bool no_wait_gpu,
+struct ttm_mem_reg *new_mem)
 {
struct nouveau_bo *nvbo = nouveau_bo(bo);
struct drm_nouveau_private *dev_priv = nouveau_bdev(bo-bdev);
@@ -569,12 +571,13 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int 
evict, bool intr,
dst_offset += (PAGE_SIZE * line_count);
}
 
-   return nouveau_bo_move_accel_cleanup(chan, nvbo, evict, no_wait, 
new_mem);
+   return nouveau_bo_move_accel_cleanup(chan, nvbo, evict, 
no_wait_reserve, no_wait_gpu, new_mem);
 }
 
 static int
 nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr,
- bool no_wait, struct ttm_mem_reg *new_mem)
+ bool no_wait_reserve, bool no_wait_gpu,
+ struct ttm_mem_reg *new_mem)
 {
u32 placement_memtype = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING;
struct ttm_placement placement;
@@ -587,7 +590,7 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool 
evict, bool intr,
 
tmp_mem = *new_mem;
tmp_mem.mm_node = NULL;
-   ret = ttm_bo_mem_space(bo, placement, tmp_mem, intr, no_wait);
+   ret = ttm_bo_mem_space(bo, placement, tmp_mem, intr, no_wait_reserve, 
no_wait_gpu);
if (ret)
return ret;
 
@@ -595,11 +598,11 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool 
evict, bool intr,
if (ret)
goto out;
 
-   ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait, tmp_mem);
+   ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait_reserve, 
no_wait_gpu, tmp_mem);
if (ret)
goto out;
 
-   ret = ttm_bo_move_ttm(bo, evict, no_wait, new_mem);
+   ret = ttm_bo_move_ttm(bo, evict, no_wait_reserve, no_wait_gpu, new_mem);
 out:
if (tmp_mem.mm_node) {
spin_lock(bo-bdev-glob-lru_lock);
@@ -612,7 +615,8 @@ out:
 
 static int
 nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict, bool intr,
- bool no_wait, struct ttm_mem_reg *new_mem)
+ bool no_wait_reserve, bool no_wait_gpu,
+ struct ttm_mem_reg *new_mem)
 {
u32 placement_memtype = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING

[PATCH 01/13] drm/ttm: split no_wait argument in 2 GPU or reserve wait

2010-04-07 Thread Jerome Glisse
There is case where we want to be able to wait only for the
GPU while not waiting for other buffer to be unreserved. This
patch split the no_wait argument all the way down in the whole
ttm path so that upper level can decide on what to wait on or
not.

This patch break the API to other modules, update to others
driver are following in separate patches.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/ttm/ttm_bo.c  |   57 
 drivers/gpu/drm/ttm/ttm_bo_util.c |9 --
 include/drm/ttm/ttm_bo_api.h  |6 ++-
 include/drm/ttm/ttm_bo_driver.h   |   29 +++---
 4 files changed, 60 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 9db02bb..6f51b30 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -357,7 +357,8 @@ static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, 
bool zero_alloc)
 
 static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
  struct ttm_mem_reg *mem,
- bool evict, bool interruptible, bool no_wait)
+ bool evict, bool interruptible,
+ bool no_wait_reserve, bool no_wait_gpu)
 {
struct ttm_bo_device *bdev = bo-bdev;
bool old_is_pci = ttm_mem_reg_is_pci(bdev, bo-mem);
@@ -402,12 +403,12 @@ static int ttm_bo_handle_move_mem(struct 
ttm_buffer_object *bo,
 
if (!(old_man-flags  TTM_MEMTYPE_FLAG_FIXED) 
!(new_man-flags  TTM_MEMTYPE_FLAG_FIXED))
-   ret = ttm_bo_move_ttm(bo, evict, no_wait, mem);
+   ret = ttm_bo_move_ttm(bo, evict, no_wait_reserve, no_wait_gpu, 
mem);
else if (bdev-driver-move)
ret = bdev-driver-move(bo, evict, interruptible,
-no_wait, mem);
+no_wait_reserve, no_wait_gpu, mem);
else
-   ret = ttm_bo_move_memcpy(bo, evict, no_wait, mem);
+   ret = ttm_bo_move_memcpy(bo, evict, no_wait_reserve, 
no_wait_gpu, mem);
 
if (ret)
goto out_err;
@@ -606,7 +607,7 @@ void ttm_bo_unref(struct ttm_buffer_object **p_bo)
 EXPORT_SYMBOL(ttm_bo_unref);
 
 static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible,
-   bool no_wait)
+   bool no_wait_reserve, bool no_wait_gpu)
 {
struct ttm_bo_device *bdev = bo-bdev;
struct ttm_bo_global *glob = bo-glob;
@@ -615,7 +616,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool 
interruptible,
int ret = 0;
 
spin_lock(bo-lock);
-   ret = ttm_bo_wait(bo, false, interruptible, no_wait);
+   ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
spin_unlock(bo-lock);
 
if (unlikely(ret != 0)) {
@@ -638,7 +639,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool 
interruptible,
placement.num_busy_placement = 0;
bdev-driver-evict_flags(bo, placement);
ret = ttm_bo_mem_space(bo, placement, evict_mem, interruptible,
-   no_wait);
+   no_wait_reserve, no_wait_gpu);
if (ret) {
if (ret != -ERESTARTSYS) {
printk(KERN_ERR TTM_PFX
@@ -650,7 +651,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool 
interruptible,
}
 
ret = ttm_bo_handle_move_mem(bo, evict_mem, true, interruptible,
-no_wait);
+no_wait_reserve, no_wait_gpu);
if (ret) {
if (ret != -ERESTARTSYS)
printk(KERN_ERR TTM_PFX Buffer eviction failed\n);
@@ -670,7 +671,8 @@ out:
 
 static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
uint32_t mem_type,
-   bool interruptible, bool no_wait)
+   bool interruptible, bool no_wait_reserve,
+   bool no_wait_gpu)
 {
struct ttm_bo_global *glob = bdev-glob;
struct ttm_mem_type_manager *man = bdev-man[mem_type];
@@ -687,11 +689,11 @@ retry:
bo = list_first_entry(man-lru, struct ttm_buffer_object, lru);
kref_get(bo-list_kref);
 
-   ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
+   ret = ttm_bo_reserve_locked(bo, false, no_wait_reserve, false, 0);
 
if (unlikely(ret == -EBUSY)) {
spin_unlock(glob-lru_lock);
-   if (likely(!no_wait))
+   if (likely(!no_wait_gpu))
ret = ttm_bo_wait_unreserved(bo, interruptible);
 
kref_put(bo-list_kref, ttm_bo_release_list);
@@ -713,7 +715,7 @@ retry:
while (put_count--)
kref_put(bo-list_kref, ttm_bo_ref_bug);
 
-   ret = ttm_bo_evict(bo

[PATCH 07/13] drm/nouveau/kms: add support for new TTM fault callback V4

2010-04-07 Thread Jerome Glisse
This add the support for the new fault callback, does change anything
from driver point of view, thought it should allow nouveau to add
support for unmappable VRAM.

Improvement: store the aperture base in a variable so that we don't
call a function to get it on each fault.

Patch hasn't been tested on any hw.

V2 don't derefence bo-mem.mm_node as it's not NULL only for
   VRAM or GTT
V3 update after io_mem_reserve/io_mem_free callback balancing
V4 callback has to ioremap

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/nouveau/nouveau_bo.c |   66 +++--
 1 files changed, 62 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 5a167de..efc124d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -386,8 +386,7 @@ nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, 
uint32_t type,
break;
case TTM_PL_VRAM:
man-flags = TTM_MEMTYPE_FLAG_FIXED |
-TTM_MEMTYPE_FLAG_MAPPABLE |
-TTM_MEMTYPE_FLAG_NEEDS_IOREMAP;
+TTM_MEMTYPE_FLAG_MAPPABLE;
man-available_caching = TTM_PL_FLAG_UNCACHED |
 TTM_PL_FLAG_WC;
man-default_caching = TTM_PL_FLAG_WC;
@@ -403,8 +402,7 @@ nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, 
uint32_t type,
case TTM_PL_TT:
switch (dev_priv-gart_info.type) {
case NOUVEAU_GART_AGP:
-   man-flags = TTM_MEMTYPE_FLAG_MAPPABLE |
-TTM_MEMTYPE_FLAG_NEEDS_IOREMAP;
+   man-flags = TTM_MEMTYPE_FLAG_MAPPABLE;
man-available_caching = TTM_PL_FLAG_UNCACHED;
man-default_caching = TTM_PL_FLAG_UNCACHED;
break;
@@ -761,6 +759,63 @@ nouveau_bo_verify_access(struct ttm_buffer_object *bo, 
struct file *filp)
return 0;
 }
 
+static int
+nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
+{
+   struct ttm_mem_type_manager *man = bdev-man[mem-mem_type];
+   struct drm_nouveau_private *dev_priv = nouveau_bdev(bdev);
+   struct drm_device *dev = dev_priv-dev;
+
+   mem-bus.addr = NULL;
+   mem-bus.offset = 0;
+   mem-bus.size = mem-num_pages  PAGE_SHIFT;
+   mem-bus.base = 0;
+   mem-bus.is_iomem = false;
+   if (!(man-flags  TTM_MEMTYPE_FLAG_MAPPABLE))
+   return -EINVAL;
+   switch (mem-mem_type) {
+   case TTM_PL_SYSTEM:
+   /* System memory */
+   return 0;
+   case TTM_PL_TT:
+#if __OS_HAS_AGP
+   if (dev_priv-gart_info.type == NOUVEAU_GART_AGP) {
+   mem-bus.offset = mem-mm_node-start  PAGE_SHIFT;
+   mem-bus.base = dev_priv-gart_info.aper_base;
+   mem-bus.is_iomem = true;
+   mem-bus.addr = ioremap_wc(mem-bus.base + 
mem-bus.offset, mem-bus.size);
+   if (mem-bus.addr == NULL)
+   return -ENOMEM;
+   }
+#endif
+   break;
+   case TTM_PL_VRAM:
+   mem-bus.offset = mem-mm_node-start  PAGE_SHIFT;
+   mem-bus.base = drm_get_resource_start(dev, 1);
+   mem-bus.is_iomem = true;
+   mem-bus.addr = ioremap_wc(mem-bus.base + mem-bus.offset, 
mem-bus.size);
+   if (mem-bus.addr == NULL)
+   return -ENOMEM;
+   break;
+   default:
+   return -EINVAL;
+   }
+   return 0;
+}
+
+static void
+nouveau_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
+{
+   if (mem-bus.addr)
+   iounmap(mem-bus.addr);
+}
+
+static int
+nouveau_ttm_fault_reserve_notify(struct ttm_buffer_object *bo)
+{
+   return 0;
+}
+
 struct ttm_bo_driver nouveau_bo_driver = {
.create_ttm_backend_entry = nouveau_bo_create_ttm_backend_entry,
.invalidate_caches = nouveau_bo_invalidate_caches,
@@ -773,5 +828,8 @@ struct ttm_bo_driver nouveau_bo_driver = {
.sync_obj_flush = nouveau_fence_flush,
.sync_obj_unref = nouveau_fence_unref,
.sync_obj_ref = nouveau_fence_ref,
+   .fault_reserve_notify = nouveau_ttm_fault_reserve_notify,
+   .io_mem_reserve = nouveau_ttm_io_mem_reserve,
+   .io_mem_free = nouveau_ttm_io_mem_free,
 };
 
-- 
1.6.6.1


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel

[PATCH 08/13] drm/vmwgfx: add support for new TTM fault callback V4

2010-04-07 Thread Jerome Glisse
This add the support for the new fault callback, does change anything
from driver point of view.

Improvement: store the aperture base in a variable so that we don't
call a function to get it on each fault.

Patch hasn't been tested.

V2 don't derefence bo-mem.mm_node as it's not NULL only for
   VRAM or GTT
V3 update after io_mem_reserve/io_mem_free callback balancing
V4 callback has to ioremap

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c |   49 ++--
 1 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
index 825ebe3..9d5a08d 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
@@ -153,8 +153,7 @@ int vmw_init_mem_type(struct ttm_bo_device *bdev, uint32_t 
type,
man-gpu_offset = 0;
man-io_offset = dev_priv-vram_start;
man-io_size = dev_priv-vram_size;
-   man-flags = TTM_MEMTYPE_FLAG_FIXED |
-   TTM_MEMTYPE_FLAG_NEEDS_IOREMAP | TTM_MEMTYPE_FLAG_MAPPABLE;
+   man-flags = TTM_MEMTYPE_FLAG_FIXED | TTM_MEMTYPE_FLAG_MAPPABLE;
man-io_addr = NULL;
man-available_caching = TTM_PL_MASK_CACHING;
man-default_caching = TTM_PL_FLAG_WC;
@@ -193,6 +192,47 @@ static void vmw_swap_notify(struct ttm_buffer_object *bo)
vmw_dmabuf_gmr_unbind(bo);
 }
 
+static int vmw_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct 
ttm_mem_reg *mem)
+{
+   struct ttm_mem_type_manager *man = bdev-man[mem-mem_type];
+   struct vmw_private *dev_priv = container_of(bdev, struct vmw_private, 
bdev);
+
+   mem-bus.addr = NULL;
+   mem-bus.is_iomem = false;
+   mem-bus.offset = 0;
+   mem-bus.size = mem-num_pages  PAGE_SHIFT;
+   mem-bus.base = 0;
+   if (!(man-flags  TTM_MEMTYPE_FLAG_MAPPABLE))
+   return -EINVAL;
+   switch (mem-mem_type) {
+   case TTM_PL_SYSTEM:
+   /* System memory */
+   return 0;
+   case TTM_PL_VRAM:
+   mem-bus.offset = mem-mm_node-start  PAGE_SHIFT;
+   mem-bus.base = dev_priv-vram_start;
+   mem-bus.is_iomem = true;
+   mem-bus.addr = ioremap_wc(mem-bus.base + mem-bus.offset, 
mem-bus.size);
+   if (mem-bus.addr == NULL)
+   return -ENOMEM;
+   break;
+   default:
+   return -EINVAL;
+   }
+   return 0;
+}
+
+static void vmw_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg 
*mem)
+{
+   if (mem-bus.addr)
+   iounmap(mem-bus.addr);
+}
+
+static int vmw_ttm_fault_reserve_notify(struct ttm_buffer_object *bo)
+{
+   return 0;
+}
+
 /**
  * FIXME: We're using the old vmware polling method to sync.
  * Do this with fences instead.
@@ -248,5 +288,8 @@ struct ttm_bo_driver vmw_bo_driver = {
.sync_obj_unref = vmw_sync_obj_unref,
.sync_obj_ref = vmw_sync_obj_ref,
.move_notify = vmw_move_notify,
-   .swap_notify = vmw_swap_notify
+   .swap_notify = vmw_swap_notify,
+   .fault_reserve_notify = vmw_ttm_fault_reserve_notify,
+   .io_mem_reserve = vmw_ttm_io_mem_reserve,
+   .io_mem_free = vmw_ttm_io_mem_free,
 };
-- 
1.6.6.1


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


[PATCH 09/13] drm/radeon/kms: don't initialize TTM io memory manager field

2010-04-07 Thread Jerome Glisse
This isn't needed anymore with the new TTM fault callback

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/radeon/radeon_ttm.c |   13 +
 1 files changed, 1 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
b/drivers/gpu/drm/radeon/radeon_ttm.c
index 1fc6cad..14da488 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -161,21 +161,13 @@ static int radeon_init_mem_type(struct ttm_bo_device 
*bdev, uint32_t type,
  (unsigned)type);
return -EINVAL;
}
-   man-io_offset = rdev-mc.agp_base;
-   man-io_size = rdev-mc.gtt_size;
-   man-io_addr = NULL;
if (!rdev-ddev-agp-cant_use_aperture)
man-flags = TTM_MEMTYPE_FLAG_MAPPABLE;
man-available_caching = TTM_PL_FLAG_UNCACHED |
 TTM_PL_FLAG_WC;
man-default_caching = TTM_PL_FLAG_WC;
-   } else
-#endif
-   {
-   man-io_offset = 0;
-   man-io_size = 0;
-   man-io_addr = NULL;
}
+#endif
break;
case TTM_PL_VRAM:
/* On-card video ram */
@@ -184,9 +176,6 @@ static int radeon_init_mem_type(struct ttm_bo_device *bdev, 
uint32_t type,
 TTM_MEMTYPE_FLAG_MAPPABLE;
man-available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC;
man-default_caching = TTM_PL_FLAG_WC;
-   man-io_addr = NULL;
-   man-io_offset = rdev-mc.aper_base;
-   man-io_size = rdev-mc.aper_size;
break;
default:
DRM_ERROR(Unsupported memory type %u\n, (unsigned)type);
-- 
1.6.6.1


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


[PATCH 13/13] drm/radeon/kms: enable use of unmappable VRAM V2

2010-04-07 Thread Jerome Glisse
This patch enable the use of unmappable VRAM thanks to
previous TTM infrastructure change.

V2 update after io_mem_reserve/io_mem_free callback balancing

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/radeon/evergreen.c |5 -
 drivers/gpu/drm/radeon/r100.c  |5 -
 drivers/gpu/drm/radeon/r600.c  |5 -
 drivers/gpu/drm/radeon/rv770.c |5 -
 4 files changed, 0 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/radeon/evergreen.c 
b/drivers/gpu/drm/radeon/evergreen.c
index 7672f11..48e0dfb 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -475,11 +475,6 @@ int evergreen_mc_init(struct radeon_device *rdev)
rdev-mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
rdev-mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
rdev-mc.visible_vram_size = rdev-mc.aper_size;
-   /* FIXME remove this once we support unmappable VRAM */
-   if (rdev-mc.mc_vram_size  rdev-mc.aper_size) {
-   rdev-mc.mc_vram_size = rdev-mc.aper_size;
-   rdev-mc.real_vram_size = rdev-mc.aper_size;
-   }
r600_vram_gtt_location(rdev, rdev-mc);
radeon_update_bandwidth_info(rdev);
 
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 7a1180d..fc95907 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -2026,11 +2026,6 @@ void r100_vram_init_sizes(struct radeon_device *rdev)
else
rdev-mc.mc_vram_size = rdev-mc.real_vram_size;
}
-   /* FIXME remove this once we support unmappable VRAM */
-   if (rdev-mc.mc_vram_size  rdev-mc.aper_size) {
-   rdev-mc.mc_vram_size = rdev-mc.aper_size;
-   rdev-mc.real_vram_size = rdev-mc.aper_size;
-   }
 }
 
 void r100_vga_set_state(struct radeon_device *rdev, bool state)
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index a948947..4206c8d 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -713,11 +713,6 @@ int r600_mc_init(struct radeon_device *rdev)
rdev-mc.mc_vram_size = RREG32(CONFIG_MEMSIZE);
rdev-mc.real_vram_size = RREG32(CONFIG_MEMSIZE);
rdev-mc.visible_vram_size = rdev-mc.aper_size;
-   /* FIXME remove this once we support unmappable VRAM */
-   if (rdev-mc.mc_vram_size  rdev-mc.aper_size) {
-   rdev-mc.mc_vram_size = rdev-mc.aper_size;
-   rdev-mc.real_vram_size = rdev-mc.aper_size;
-   }
r600_vram_gtt_location(rdev, rdev-mc);
 
if (rdev-flags  RADEON_IS_IGP)
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 2b8a4e1..599121b 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -905,11 +905,6 @@ int rv770_mc_init(struct radeon_device *rdev)
rdev-mc.mc_vram_size = RREG32(CONFIG_MEMSIZE);
rdev-mc.real_vram_size = RREG32(CONFIG_MEMSIZE);
rdev-mc.visible_vram_size = rdev-mc.aper_size;
-   /* FIXME remove this once we support unmappable VRAM */
-   if (rdev-mc.mc_vram_size  rdev-mc.aper_size) {
-   rdev-mc.mc_vram_size = rdev-mc.aper_size;
-   rdev-mc.real_vram_size = rdev-mc.aper_size;
-   }
r600_vram_gtt_location(rdev, rdev-mc);
radeon_update_bandwidth_info(rdev);
 
-- 
1.6.6.1


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


Re: unmappable vram V6

2010-04-05 Thread Jerome Glisse
On Mon, Apr 05, 2010 at 02:23:58PM +0200, Thomas Hellstrom wrote:
 Jerome Glisse wrote:
  So in these patchset i use bool instead of atomic remove empty line
  removal, and i hope addressed standing issues. Again only compile
  tested for nouveau  vmwgfx. Tested this time only tested on RV710
  with special patch to force unmappable vram use.
  http://people.freedesktop.org/~glisse/0014-TEST-UNMAPPABLE.patch
 
  Seems to work flawlessly (quake3, compiz, glxgears, firefox running
  side by side)
 
  Cheers,
  Jerome
 

 
 Jerome, Dave.
 
 Apart from a small fix in patch 12/13, I'm fine with this going in. 
 Unfortunately I haven't had time to look at the Nouveau / Radeon 
 patches, but the ttm- and vmwgfx patches look fine.
 
 Thanks,
 Thomas

I will do a last spawn of patch with removal of
need ioremap stuff as you said this now can be
handled in the driver and so ttm doesn't have to
worry anymore.

Thanks a lot for reviewing all this :)

Cheers,
Jerome

--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


Re: [PATCH 1/7] drm/ttm: add pool wc/uc page allocator V2

2010-04-01 Thread Jerome Glisse
On Sun, Mar 28, 2010 at 09:16:01PM +0300, Pauli Nieminen wrote:
 On AGP system we might allocate/free routinely uncached or wc memory,
 changing page from cached (wb) to uc or wc is very expensive and involves
 a lot of flushing. To improve performance this allocator use a pool
 of uc,wc pages.
 
 Pools are protected with spinlocks to allow multiple threads to allocate pages
 simultanously. Expensive operations are done outside of spinlock to maximize
 concurrency.
 
 Pools are linked lists of pages that were recently freed. mm shrink callback
 allows kernel to claim back pages when they are required for something else.
 
 Fixes:
 * set_pages_array_wb handles highmem pages so we don't have to remove them
   from pool.
 * Add count parameter to ttm_put_pages to avoid looping in free code.
 * Change looping from _safe to normal in pool fill error path.
 * Initialize sum variable and make the loop prettier in get_num_unused_pages
 
 Based on Jerome Glisse's and Dave Airlie's pool allocator.
 
 Signed-off-by: Jerome Glisse jgli...@redhat.com
 Signed-off-by: Dave Airlie airl...@redhat.com
 Signed-off-by: Pauli Nieminen suok...@gmail.com

I think there is only one issue left see below, once yo got that one
fixed you got my ACK

Cheers,
Jerome

 ---
  drivers/gpu/drm/ttm/Makefile |2 +-
  drivers/gpu/drm/ttm/ttm_memory.c |7 +-
  drivers/gpu/drm/ttm/ttm_page_alloc.c |  705 
 ++
  drivers/gpu/drm/ttm/ttm_tt.c |   44 +--
  include/drm/ttm/ttm_page_alloc.h |   70 
  5 files changed, 803 insertions(+), 25 deletions(-)
  create mode 100644 drivers/gpu/drm/ttm/ttm_page_alloc.c
  create mode 100644 include/drm/ttm/ttm_page_alloc.h
 
 diff --git a/drivers/gpu/drm/ttm/Makefile b/drivers/gpu/drm/ttm/Makefile
 index 1e138f5..4256e20 100644
 --- a/drivers/gpu/drm/ttm/Makefile
 +++ b/drivers/gpu/drm/ttm/Makefile
 @@ -4,6 +4,6 @@
  ccflags-y := -Iinclude/drm
  ttm-y := ttm_agp_backend.o ttm_memory.o ttm_tt.o ttm_bo.o \
   ttm_bo_util.o ttm_bo_vm.o ttm_module.o ttm_global.o \
 - ttm_object.o ttm_lock.o ttm_execbuf_util.o
 + ttm_object.o ttm_lock.o ttm_execbuf_util.o ttm_page_alloc.o
  
  obj-$(CONFIG_DRM_TTM) += ttm.o
 diff --git a/drivers/gpu/drm/ttm/ttm_memory.c 
 b/drivers/gpu/drm/ttm/ttm_memory.c
 index eb143e0..72f31aa 100644
 --- a/drivers/gpu/drm/ttm/ttm_memory.c
 +++ b/drivers/gpu/drm/ttm/ttm_memory.c
 @@ -27,6 +27,7 @@
  
  #include ttm/ttm_memory.h
  #include ttm/ttm_module.h
 +#include ttm/ttm_page_alloc.h
  #include linux/spinlock.h
  #include linux/sched.h
  #include linux/wait.h
 @@ -394,6 +395,7 @@ int ttm_mem_global_init(struct ttm_mem_global *glob)
  Zone %7s: Available graphics memory: %llu kiB.\n,
  zone-name, (unsigned long long) zone-max_mem  10);
   }
 + ttm_page_alloc_init(glob-zone_kernel-max_mem/(2*PAGE_SIZE));
   return 0;
  out_no_zone:
   ttm_mem_global_release(glob);
 @@ -406,6 +408,9 @@ void ttm_mem_global_release(struct ttm_mem_global *glob)
   unsigned int i;
   struct ttm_mem_zone *zone;
  
 + /* let the page allocator first stop the shrink work. */
 + ttm_page_alloc_fini();
 +
   flush_workqueue(glob-swap_queue);
   destroy_workqueue(glob-swap_queue);
   glob-swap_queue = NULL;
 @@ -413,7 +418,7 @@ void ttm_mem_global_release(struct ttm_mem_global *glob)
   zone = glob-zones[i];
   kobject_del(zone-kobj);
   kobject_put(zone-kobj);
 - }
 + }
   kobject_del(glob-kobj);
   kobject_put(glob-kobj);
  }
 diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c 
 b/drivers/gpu/drm/ttm/ttm_page_alloc.c
 new file mode 100644
 index 000..5029fd0
 --- /dev/null
 +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
 @@ -0,0 +1,705 @@
 +/*
 + * Copyright (c) Red Hat Inc.
 +
 + * Permission is hereby granted, free of charge, to any person obtaining a
 + * copy of this software and associated documentation files (the Software),
 + * to deal in the Software without restriction, including without limitation
 + * the rights to use, copy, modify, merge, publish, distribute, sub license,
 + * and/or sell copies of the Software, and to permit persons to whom the
 + * Software is furnished to do so, subject to the following conditions:
 + *
 + * The above copyright notice and this permission notice (including the
 + * next paragraph) shall be included in all copies or substantial portions
 + * of the Software.
 + *
 + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 + * DEALINGS IN THE SOFTWARE

Re: [PATCH 1/7] drm/ttm: add pool wc/uc page allocator V3

2010-04-01 Thread Jerome Glisse
On Thu, Apr 01, 2010 at 03:44:57PM +0300, Pauli Nieminen wrote:
 On AGP system we might allocate/free routinely uncached or wc memory,
 changing page from cached (wb) to uc or wc is very expensive and involves
 a lot of flushing. To improve performance this allocator use a pool
 of uc,wc pages.
 
 Pools are protected with spinlocks to allow multiple threads to allocate pages
 simultanously. Expensive operations are done outside of spinlock to maximize
 concurrency.
 
 Pools are linked lists of pages that were recently freed. mm shrink callback
 allows kernel to claim back pages when they are required for something else.
 
 Fixes:
 * set_pages_array_wb handles highmem pages so we don't have to remove them
   from pool.
 * Add count parameter to ttm_put_pages to avoid looping in free code.
 * Change looping from _safe to normal in pool fill error path.
 * Initialize sum variable and make the loop prettier in get_num_unused_pages.
 
 * Moved pages_freed reseting inside the loop in ttm_page_pool_free.
 * Add warning comment about spinlock context in ttm_page_pool_free.
 
 Based on Jerome Glisse's and Dave Airlie's pool allocator.
 
 Signed-off-by: Jerome Glisse jgli...@redhat.com
 Signed-off-by: Dave Airlie airl...@redhat.com
 Signed-off-by: Pauli Nieminen suok...@gmail.com
Reviewed-by: Jerome Glisse jgli...@redhat.com
 ---
  drivers/gpu/drm/ttm/Makefile |2 +-
  drivers/gpu/drm/ttm/ttm_memory.c |7 +-
  drivers/gpu/drm/ttm/ttm_page_alloc.c |  711 
 ++
  drivers/gpu/drm/ttm/ttm_tt.c |   44 +--
  include/drm/ttm/ttm_page_alloc.h |   70 
  5 files changed, 809 insertions(+), 25 deletions(-)
  create mode 100644 drivers/gpu/drm/ttm/ttm_page_alloc.c
  create mode 100644 include/drm/ttm/ttm_page_alloc.h
 
 diff --git a/drivers/gpu/drm/ttm/Makefile b/drivers/gpu/drm/ttm/Makefile
 index 1e138f5..4256e20 100644
 --- a/drivers/gpu/drm/ttm/Makefile
 +++ b/drivers/gpu/drm/ttm/Makefile
 @@ -4,6 +4,6 @@
  ccflags-y := -Iinclude/drm
  ttm-y := ttm_agp_backend.o ttm_memory.o ttm_tt.o ttm_bo.o \
   ttm_bo_util.o ttm_bo_vm.o ttm_module.o ttm_global.o \
 - ttm_object.o ttm_lock.o ttm_execbuf_util.o
 + ttm_object.o ttm_lock.o ttm_execbuf_util.o ttm_page_alloc.o
  
  obj-$(CONFIG_DRM_TTM) += ttm.o
 diff --git a/drivers/gpu/drm/ttm/ttm_memory.c 
 b/drivers/gpu/drm/ttm/ttm_memory.c
 index eb143e0..72f31aa 100644
 --- a/drivers/gpu/drm/ttm/ttm_memory.c
 +++ b/drivers/gpu/drm/ttm/ttm_memory.c
 @@ -27,6 +27,7 @@
  
  #include ttm/ttm_memory.h
  #include ttm/ttm_module.h
 +#include ttm/ttm_page_alloc.h
  #include linux/spinlock.h
  #include linux/sched.h
  #include linux/wait.h
 @@ -394,6 +395,7 @@ int ttm_mem_global_init(struct ttm_mem_global *glob)
  Zone %7s: Available graphics memory: %llu kiB.\n,
  zone-name, (unsigned long long) zone-max_mem  10);
   }
 + ttm_page_alloc_init(glob-zone_kernel-max_mem/(2*PAGE_SIZE));
   return 0;
  out_no_zone:
   ttm_mem_global_release(glob);
 @@ -406,6 +408,9 @@ void ttm_mem_global_release(struct ttm_mem_global *glob)
   unsigned int i;
   struct ttm_mem_zone *zone;
  
 + /* let the page allocator first stop the shrink work. */
 + ttm_page_alloc_fini();
 +
   flush_workqueue(glob-swap_queue);
   destroy_workqueue(glob-swap_queue);
   glob-swap_queue = NULL;
 @@ -413,7 +418,7 @@ void ttm_mem_global_release(struct ttm_mem_global *glob)
   zone = glob-zones[i];
   kobject_del(zone-kobj);
   kobject_put(zone-kobj);
 - }
 + }
   kobject_del(glob-kobj);
   kobject_put(glob-kobj);
  }
 diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c 
 b/drivers/gpu/drm/ttm/ttm_page_alloc.c
 new file mode 100644
 index 000..f46e40b
 --- /dev/null
 +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
 @@ -0,0 +1,711 @@
 +/*
 + * Copyright (c) Red Hat Inc.
 +
 + * Permission is hereby granted, free of charge, to any person obtaining a
 + * copy of this software and associated documentation files (the Software),
 + * to deal in the Software without restriction, including without limitation
 + * the rights to use, copy, modify, merge, publish, distribute, sub license,
 + * and/or sell copies of the Software, and to permit persons to whom the
 + * Software is furnished to do so, subject to the following conditions:
 + *
 + * The above copyright notice and this permission notice (including the
 + * next paragraph) shall be included in all copies or substantial portions
 + * of the Software.
 + *
 + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 + * FROM, OUT

AGP force use of scratch page

2010-04-01 Thread Jerome Glisse
Hi,

Attached is a patch which force the use of the scratch page
for all AGP bridges, only tested on SIS or Intel bridges,
i am bit skeptical for alpha bridges. If you prefer i can
just force this for x86  PPC bridge for which we can easily
get tester.

Cheers,
Jerome
From 5b2469a7149280fb6866345cb6d3d395ee400cd8 Mon Sep 17 00:00:00 2001
From: Jerome Glisse jgli...@redhat.com
Date: Thu, 1 Apr 2010 16:46:15 +0200
Subject: [PATCH] agp: force use of scratch page

It seems that some AGP hardware can induce activity on bus
after reporting being idle with the memory (preteching maybe).
As new infrastructure for GPU is dynamicly binding/unbinding
memory this result in hardware possibly trying to access invalid
memory to avoid this situation the patch for the use of the
scratch page mecanism for all the AGP driver. It remove the
needs_scratch_page flags and fix the insert_memory/remove_memory
to deal properly with scratch_page.

Been tested on intel  sis agp bridges.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/char/agp/agp.h  |1 -
 drivers/char/agp/backend.c  |   57 +-
 drivers/char/agp/hp-agp.c   |2 +-
 drivers/char/agp/i460-agp.c |2 +-
 drivers/char/agp/intel-agp.c|   30 +++-
 drivers/char/agp/parisc-agp.c   |2 +-
 drivers/char/agp/sgi-agp.c  |5 +--
 drivers/char/agp/uninorth-agp.c |3 +-
 8 files changed, 42 insertions(+), 60 deletions(-)

diff --git a/drivers/char/agp/agp.h b/drivers/char/agp/agp.h
index 870f12c..db68c7b 100644
--- a/drivers/char/agp/agp.h
+++ b/drivers/char/agp/agp.h
@@ -100,7 +100,6 @@ struct agp_bridge_driver {
int num_aperture_sizes;
enum aper_size_type size_type;
bool cant_use_aperture;
-   bool needs_scratch_page;
const struct gatt_mask *masks;
int (*fetch_size)(void);
int (*configure)(void);
diff --git a/drivers/char/agp/backend.c b/drivers/char/agp/backend.c
index c3ab46d..fed5e46 100644
--- a/drivers/char/agp/backend.c
+++ b/drivers/char/agp/backend.c
@@ -136,35 +136,32 @@ static int agp_find_max(void)
 static int agp_backend_initialize(struct agp_bridge_data *bridge)
 {
int size_value, rc, got_gatt=0, got_keylist=0;
+   struct page *page;
+   void *va;
 
bridge-max_memory_agp = agp_find_max();
bridge-version = agp_current_version;
 
-   if (bridge-driver-needs_scratch_page) {
-   struct page *page = bridge-driver-agp_alloc_page(bridge);
-
-   if (!page) {
-   dev_err(bridge-dev-dev,
+   page = bridge-driver-agp_alloc_page(bridge);
+   if (!page) {
+   dev_err(bridge-dev-dev,
can't get memory for scratch page\n);
-   return -ENOMEM;
-   }
-
-   bridge-scratch_page_page = page;
-   if (bridge-driver-agp_map_page) {
-   if (bridge-driver-agp_map_page(page,
-
bridge-scratch_page_dma)) {
-   dev_err(bridge-dev-dev,
+   return -ENOMEM;
+   }
+   bridge-scratch_page_page = page;
+   if (bridge-driver-agp_map_page) {
+   if (bridge-driver-agp_map_page(page,
+   bridge-scratch_page_dma)) {
+   dev_err(bridge-dev-dev,
unable to dma-map scratch page\n);
-   rc = -ENOMEM;
-   goto err_out_nounmap;
-   }
-   } else {
-   bridge-scratch_page_dma = page_to_phys(page);
+   rc = -ENOMEM;
+   goto err_out_nounmap;
}
-
-   bridge-scratch_page = bridge-driver-mask_memory(bridge,
-  bridge-scratch_page_dma, 0);
+   } else {
+   bridge-scratch_page_dma = page_to_phys(page);
}
+   bridge-scratch_page = bridge-driver-mask_memory(bridge,
+   bridge-scratch_page_dma, 0);
 
size_value = bridge-driver-fetch_size();
if (size_value == 0) {
@@ -203,18 +200,15 @@ static int agp_backend_initialize(struct agp_bridge_data 
*bridge)
return 0;
 
 err_out:
-   if (bridge-driver-needs_scratch_page 
-   bridge-driver-agp_unmap_page) {
+   if (bridge-driver-agp_unmap_page) {
bridge-driver-agp_unmap_page(bridge-scratch_page_page,
   bridge-scratch_page_dma);
}
 err_out_nounmap:
-   if (bridge-driver-needs_scratch_page) {
-   void *va = page_address(bridge-scratch_page_page);
+   va = page_address(bridge-scratch_page_page);
+   bridge-driver-agp_destroy_page(va, AGP_PAGE_DESTROY_UNMAP);
+   bridge-driver-agp_destroy_page(va

Re: [git pull] drm fixes

2010-03-30 Thread Jerome Glisse
On Tue, Mar 30, 2010 at 07:24:42AM -0700, Linus Torvalds wrote:
 
 
 On Tue, 30 Mar 2010, Dave Airlie wrote:
  
  Actually Linus, don't bother, consider this revoked, I'm going to kill 
  the GPU reset code and re-send this tomorrow, its just a mess to get it 
  back out of the tree at this point,
  
  but I realised I was falling back to the old ways, of putting things 
  with badness in, even if they helped a few people.
 
 Hey, thanks. 
 
   Linus

I will try to see if i can do a smaller version which only affect
r6xx/r7xx hw were it's know to be helpfull while leaving other hw
unaffected and working on a more reliable code for 2.6.35 for these
older hw.

Cheers.
Jerome

--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


Re: [PATCH 1/7] drm/ttm: add pool wc/uc page allocator

2010-03-26 Thread Jerome Glisse
On Thu, Mar 25, 2010 at 12:36:43AM +0200, Pauli Nieminen wrote:
 On AGP system we might allocate/free routinely uncached or wc memory,
 changing page from cached (wb) to uc or wc is very expensive and involves
 a lot of flushing. To improve performance this allocator use a pool
 of uc,wc pages.
 
 Pools are protected with spinlocks to allow multiple threads to allocate pages
 simultanously. Expensive operations are done outside of spinlock to maximize
 concurrency.
 
 Pools are linked lists of pages that were recently freed. mm shrink callback
 allows kernel to claim back pages when they are required for something else.
 
 Based on Jerome Glisse's and Dave Airlie's pool allocator.
 
 Signed-off-by: Jerome Glisse jgli...@redhat.com
 Signed-off-by: Dave Airlie airl...@redhat.com
 Signed-off-by: Pauli Nieminen suok...@gmail.com

I think using array rather than list would have make things simplier.
Anyway this patchset looks good, couple of issues you need to fix first,
comment in the code.

Cheers,
Jerome

 ---
  drivers/gpu/drm/ttm/Makefile |2 +-
  drivers/gpu/drm/ttm/ttm_memory.c |7 +-
  drivers/gpu/drm/ttm/ttm_page_alloc.c |  718 
 ++
  drivers/gpu/drm/ttm/ttm_tt.c |   44 +--
  include/drm/ttm/ttm_page_alloc.h |   64 +++
  5 files changed, 810 insertions(+), 25 deletions(-)
  create mode 100644 drivers/gpu/drm/ttm/ttm_page_alloc.c
  create mode 100644 include/drm/ttm/ttm_page_alloc.h
 
 diff --git a/drivers/gpu/drm/ttm/Makefile b/drivers/gpu/drm/ttm/Makefile
 index 1e138f5..4256e20 100644
 --- a/drivers/gpu/drm/ttm/Makefile
 +++ b/drivers/gpu/drm/ttm/Makefile
 @@ -4,6 +4,6 @@
  ccflags-y := -Iinclude/drm
  ttm-y := ttm_agp_backend.o ttm_memory.o ttm_tt.o ttm_bo.o \
   ttm_bo_util.o ttm_bo_vm.o ttm_module.o ttm_global.o \
 - ttm_object.o ttm_lock.o ttm_execbuf_util.o
 + ttm_object.o ttm_lock.o ttm_execbuf_util.o ttm_page_alloc.o
  
  obj-$(CONFIG_DRM_TTM) += ttm.o
 diff --git a/drivers/gpu/drm/ttm/ttm_memory.c 
 b/drivers/gpu/drm/ttm/ttm_memory.c
 index eb143e0..72f31aa 100644
 --- a/drivers/gpu/drm/ttm/ttm_memory.c
 +++ b/drivers/gpu/drm/ttm/ttm_memory.c
 @@ -27,6 +27,7 @@
  
  #include ttm/ttm_memory.h
  #include ttm/ttm_module.h
 +#include ttm/ttm_page_alloc.h
  #include linux/spinlock.h
  #include linux/sched.h
  #include linux/wait.h
 @@ -394,6 +395,7 @@ int ttm_mem_global_init(struct ttm_mem_global *glob)
  Zone %7s: Available graphics memory: %llu kiB.\n,
  zone-name, (unsigned long long) zone-max_mem  10);
   }
 + ttm_page_alloc_init(glob-zone_kernel-max_mem/(2*PAGE_SIZE));
   return 0;
  out_no_zone:
   ttm_mem_global_release(glob);
 @@ -406,6 +408,9 @@ void ttm_mem_global_release(struct ttm_mem_global *glob)
   unsigned int i;
   struct ttm_mem_zone *zone;
  
 + /* let the page allocator first stop the shrink work. */
 + ttm_page_alloc_fini();
 +
   flush_workqueue(glob-swap_queue);
   destroy_workqueue(glob-swap_queue);
   glob-swap_queue = NULL;
 @@ -413,7 +418,7 @@ void ttm_mem_global_release(struct ttm_mem_global *glob)
   zone = glob-zones[i];
   kobject_del(zone-kobj);
   kobject_put(zone-kobj);
 - }
 + }
   kobject_del(glob-kobj);
   kobject_put(glob-kobj);
  }
 diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c 
 b/drivers/gpu/drm/ttm/ttm_page_alloc.c
 new file mode 100644
 index 000..18be14f
 --- /dev/null
 +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
 @@ -0,0 +1,718 @@
 +/*
 + * Copyright (c) Red Hat Inc.
 +
 + * Permission is hereby granted, free of charge, to any person obtaining a
 + * copy of this software and associated documentation files (the Software),
 + * to deal in the Software without restriction, including without limitation
 + * the rights to use, copy, modify, merge, publish, distribute, sub license,
 + * and/or sell copies of the Software, and to permit persons to whom the
 + * Software is furnished to do so, subject to the following conditions:
 + *
 + * The above copyright notice and this permission notice (including the
 + * next paragraph) shall be included in all copies or substantial portions
 + * of the Software.
 + *
 + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 + * DEALINGS IN THE SOFTWARE.
 + *
 + * Authors: Dave Airlie airl...@redhat.com
 + *  Jerome Glisse jgli...@redhat.com
 + *  Pauli Nieminen suok...@gmail.com
 + */
 +
 +/* simple list based uncached page pool
 + * - Pool collects resently freed pages

[PATCH 07/13] drm/nouveau/kms: add support for new TTM fault callback V3

2010-03-25 Thread Jerome Glisse
This add the support for the new fault callback, does change anything
from driver point of view, thought it should allow nouveau to add
support for unmappable VRAM.

Improvement: store the aperture base in a variable so that we don't
call a function to get it on each fault.

Patch hasn't been tested on any hw.

V2 don't derefence bo-mem.mm_node as it's not NULL only for
   VRAM or GTT
V3 update after io_mem_reserve/io_mem_free callback balancing

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/nouveau/nouveau_bo.c |   51 ++
 1 files changed, 51 insertions(+), 0 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 5a167de..dafb79c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -761,6 +761,54 @@ nouveau_bo_verify_access(struct ttm_buffer_object *bo, 
struct file *filp)
return 0;
 }
 
+static int
+nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
+{
+   struct ttm_mem_type_manager *man = bdev-man[mem-mem_type];
+   struct drm_nouveau_private *dev_priv = nouveau_bdev(bdev);
+   struct drm_device *dev = dev_priv-dev;
+
+   mem-bus.offset = 0;
+   mem-bus.size = mem-num_pages  PAGE_SHIFT;
+   mem-bus.base = 0;
+   mem-bus.is_iomem = false;
+   if (!(man-flags  TTM_MEMTYPE_FLAG_MAPPABLE))
+   return -EINVAL;
+   switch (mem-mem_type) {
+   case TTM_PL_SYSTEM:
+   /* System memory */
+   return 0;
+   case TTM_PL_TT:
+#if __OS_HAS_AGP
+   if (dev_priv-gart_info.type == NOUVEAU_GART_AGP) {
+   mem-bus.offset = mem-mm_node-start  PAGE_SHIFT;
+   mem-bus.base = dev_priv-gart_info.aper_base;
+   mem-bus.is_iomem = true;
+   }
+#endif
+   return 0;
+   case TTM_PL_VRAM:
+   mem-bus.offset = mem-mm_node-start  PAGE_SHIFT;
+   mem-bus.base = drm_get_resource_start(dev, 1);
+   mem-bus.is_iomem = true;
+   break;
+   default:
+   return -EINVAL;
+   }
+   return 0;
+}
+
+static void
+nouveau_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
+{
+}
+
+static int
+nouveau_ttm_fault_reserve_notify(struct ttm_buffer_object *bo)
+{
+   return 0;
+}
+
 struct ttm_bo_driver nouveau_bo_driver = {
.create_ttm_backend_entry = nouveau_bo_create_ttm_backend_entry,
.invalidate_caches = nouveau_bo_invalidate_caches,
@@ -773,5 +821,8 @@ struct ttm_bo_driver nouveau_bo_driver = {
.sync_obj_flush = nouveau_fence_flush,
.sync_obj_unref = nouveau_fence_unref,
.sync_obj_ref = nouveau_fence_ref,
+   .fault_reserve_notify = nouveau_ttm_fault_reserve_notify,
+   .io_mem_reserve = nouveau_ttm_io_mem_reserve,
+   .io_mem_free = nouveau_ttm_io_mem_free,
 };
 
-- 
1.6.6.1


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


unmappable vram V6

2010-03-25 Thread Jerome Glisse
So in these patchset i use bool instead of atomic remove empty line
removal, and i hope addressed standing issues. Again only compile
tested for nouveau  vmwgfx. Tested this time only tested on RV710
with special patch to force unmappable vram use.
http://people.freedesktop.org/~glisse/0014-TEST-UNMAPPABLE.patch

Seems to work flawlessly (quake3, compiz, glxgears, firefox running
side by side)

Cheers,
Jerome


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


[PATCH 12/13] drm/ttm: remove io_ field from TTM V4

2010-03-25 Thread Jerome Glisse
All TTM driver have been converted to new io_mem_reserve/free
interface which allow driver to choose and return proper io
base, offset to core TTM for ioremapping if necessary. This
patch remove what is now deadcode.

V2 adapt to match with change in first patch of the patchset
V3 update after io_mem_reserve/io_mem_free callback balancing
V4 adjust to minor cleanup

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/ttm/ttm_bo.c  |   22 --
 drivers/gpu/drm/ttm/ttm_bo_util.c |   29 +
 include/drm/ttm/ttm_bo_driver.h   |   10 --
 3 files changed, 9 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 2171f86..1f27cf2 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -79,8 +79,6 @@ static void ttm_mem_type_debug(struct ttm_bo_device *bdev, 
int mem_type)
printk(KERN_ERR TTM_PFX use_type: %d\n, man-use_type);
printk(KERN_ERR TTM_PFX flags: 0x%08X\n, man-flags);
printk(KERN_ERR TTM_PFX gpu_offset: 0x%08lX\n, man-gpu_offset);
-   printk(KERN_ERR TTM_PFX io_offset: 0x%08lX\n, man-io_offset);
-   printk(KERN_ERR TTM_PFX io_size: %ld\n, man-io_size);
printk(KERN_ERR TTM_PFX size: %llu\n, man-size);
printk(KERN_ERR TTM_PFX available_caching: 0x%08X\n,
man-available_caching);
@@ -1563,26 +1561,6 @@ bool ttm_mem_reg_is_pci(struct ttm_bo_device *bdev, 
struct ttm_mem_reg *mem)
return true;
 }
 
-int ttm_bo_pci_offset(struct ttm_bo_device *bdev,
- struct ttm_mem_reg *mem,
- unsigned long *bus_base,
- unsigned long *bus_offset, unsigned long *bus_size)
-{
-   struct ttm_mem_type_manager *man = bdev-man[mem-mem_type];
-
-   *bus_size = 0;
-   if (!(man-flags  TTM_MEMTYPE_FLAG_MAPPABLE))
-   return -EINVAL;
-
-   if (ttm_mem_reg_is_pci(bdev, mem)) {
-   *bus_offset = mem-mm_node-start  PAGE_SHIFT;
-   *bus_size = mem-num_pages  PAGE_SHIFT;
-   *bus_base = man-io_offset + (uintptr_t)man-io_addr;
-   }
-
-   return 0;
-}
-
 void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo)
 {
struct ttm_bo_device *bdev = bo-bdev;
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 878dc49..9f9b287 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -83,31 +83,20 @@ EXPORT_SYMBOL(ttm_bo_move_ttm);
 
 int ttm_mem_io_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
 {
-   int ret;
+   int ret = 0;
 
-   if (bdev-driver-io_mem_reserve) {
-   if (!mem-bus.io_reserved) {
-   mem-bus.io_reserved = true;
-   ret = bdev-driver-io_mem_reserve(bdev, mem);
-   if (unlikely(ret != 0))
-   return ret;
-   }
-   } else {
-   ret = ttm_bo_pci_offset(bdev, mem, mem-bus.base, 
mem-bus.offset, mem-bus.size);
-   if (unlikely(ret != 0))
-   return ret;
-   mem-bus.is_iomem = (mem-bus.size  0) ? 1 : 0;
+   if (!mem-bus.io_reserved) {
+   mem-bus.io_reserved = true;
+   ret = bdev-driver-io_mem_reserve(bdev, mem);
}
-   return 0;
+   return ret;
 }
 
 void ttm_mem_io_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
 {
-   if (bdev-driver-io_mem_reserve) {
-   if (mem-bus.io_reserved) {
-   mem-bus.io_reserved = false;
-   bdev-driver-io_mem_free(bdev, mem);
-   }
+   if (mem-bus.io_reserved) {
+   mem-bus.io_reserved = false;
+   bdev-driver-io_mem_free(bdev, mem);
}
 }
 
@@ -411,7 +400,7 @@ static int ttm_bo_ioremap(struct ttm_buffer_object *bo,
 
if (!(man-flags  TTM_MEMTYPE_FLAG_NEEDS_IOREMAP)) {
map-bo_kmap_type = ttm_bo_map_premapped;
-   map-virtual = (void *)(((u8 *) man-io_addr) + bus_offset);
+   map-virtual = (void *)(bus_base + bus_offset);
} else {
map-bo_kmap_type = ttm_bo_map_iomap;
if (mem-placement  TTM_PL_FLAG_WC)
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index da39865..35e2b9f 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -189,13 +189,6 @@ struct ttm_tt {
  * managed by this memory type.
  * @gpu_offset: If used, the GPU offset of the first managed page of
  * fixed memory or the first managed location in an aperture.
- * @io_offset: The io_offset of the first managed page of IO memory or
- * the first managed location in an aperture. For TTM_MEMTYPE_FLAG_CMA
- * memory, this should be set to NULL.
- * @io_size: The size of a managed IO region (fixed memory

[PATCH 11/13] drm/vmwgfx: don't initialize TTM io memory manager field

2010-03-25 Thread Jerome Glisse
This isn't needed anymore with the new TTM fault callback

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c |6 --
 1 files changed, 0 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
index 7e28448..a0fb612 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
@@ -137,9 +137,6 @@ int vmw_invalidate_caches(struct ttm_bo_device *bdev, 
uint32_t flags)
 int vmw_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
  struct ttm_mem_type_manager *man)
 {
-   struct vmw_private *dev_priv =
-   container_of(bdev, struct vmw_private, bdev);
-
switch (type) {
case TTM_PL_SYSTEM:
/* System memory */
@@ -151,11 +148,8 @@ int vmw_init_mem_type(struct ttm_bo_device *bdev, uint32_t 
type,
case TTM_PL_VRAM:
/* On-card video ram */
man-gpu_offset = 0;
-   man-io_offset = dev_priv-vram_start;
-   man-io_size = dev_priv-vram_size;
man-flags = TTM_MEMTYPE_FLAG_FIXED |
TTM_MEMTYPE_FLAG_NEEDS_IOREMAP | TTM_MEMTYPE_FLAG_MAPPABLE;
-   man-io_addr = NULL;
man-available_caching = TTM_PL_MASK_CACHING;
man-default_caching = TTM_PL_FLAG_WC;
break;
-- 
1.6.6.1


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


[PATCH 04/13] drm/vmwgfx: update to TTM no_wait splitted argument

2010-03-25 Thread Jerome Glisse
This patch update radeon to the new no_wait splitted argument
TTM functionality.

Compile tested only (but thing should run as there is no
operating change from driver point of view)

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c |4 ++--
 drivers/gpu/drm/vmwgfx/vmwgfx_fb.c  |4 ++--
 drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c |2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 0897359..dbd36b8 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -570,7 +570,7 @@ static int vmw_validate_single_buffer(struct vmw_private 
*dev_priv,
 * Put BO in VRAM, only if there is space.
 */
 
-   ret = ttm_bo_validate(bo, vmw_vram_sys_placement, true, false);
+   ret = ttm_bo_validate(bo, vmw_vram_sys_placement, true, false, false);
if (unlikely(ret == -ERESTARTSYS))
return ret;
 
@@ -590,7 +590,7 @@ static int vmw_validate_single_buffer(struct vmw_private 
*dev_priv,
 * previous contents.
 */
 
-   ret = ttm_bo_validate(bo, vmw_vram_placement, true, false);
+   ret = ttm_bo_validate(bo, vmw_vram_placement, true, false, false);
return ret;
 }
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
index a933670..80125ff 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
@@ -628,7 +628,7 @@ int vmw_dmabuf_from_vram(struct vmw_private *vmw_priv,
if (unlikely(ret != 0))
return ret;
 
-   ret = ttm_bo_validate(bo, vmw_sys_placement, false, false);
+   ret = ttm_bo_validate(bo, vmw_sys_placement, false, false, false);
ttm_bo_unreserve(bo);
 
return ret;
@@ -652,7 +652,7 @@ int vmw_dmabuf_to_start_of_vram(struct vmw_private 
*vmw_priv,
if (unlikely(ret != 0))
goto err_unlock;
 
-   ret = ttm_bo_validate(bo, ne_placement, false, false);
+   ret = ttm_bo_validate(bo, ne_placement, false, false, false);
ttm_bo_unreserve(bo);
 err_unlock:
ttm_write_unlock(vmw_priv-active_master-lock);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
index 5b6eabe..ad566c8 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
@@ -118,7 +118,7 @@ static int vmw_dmabuf_pin_in_vram(struct vmw_private 
*dev_priv,
if (pin)
overlay_placement = vmw_vram_ne_placement;
 
-   ret = ttm_bo_validate(bo, overlay_placement, interruptible, false);
+   ret = ttm_bo_validate(bo, overlay_placement, interruptible, false, 
false);
 
ttm_bo_unreserve(bo);
 
-- 
1.6.6.1


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


[PATCH 08/13] drm/vmwgfx: add support for new TTM fault callback V3

2010-03-25 Thread Jerome Glisse
This add the support for the new fault callback, does change anything
from driver point of view.

Improvement: store the aperture base in a variable so that we don't
call a function to get it on each fault.

Patch hasn't been tested.

V2 don't derefence bo-mem.mm_node as it's not NULL only for
   VRAM or GTT
V3 update after io_mem_reserve/io_mem_free callback balancing

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c |   40 +++-
 1 files changed, 39 insertions(+), 1 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
index 825ebe3..7e28448 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
@@ -193,6 +193,41 @@ static void vmw_swap_notify(struct ttm_buffer_object *bo)
vmw_dmabuf_gmr_unbind(bo);
 }
 
+static int vmw_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct 
ttm_mem_reg *mem)
+{
+   struct ttm_mem_type_manager *man = bdev-man[mem-mem_type];
+   struct vmw_private *dev_priv = container_of(bdev, struct vmw_private, 
bdev);
+
+   mem-bus.is_iomem = false;
+   mem-bus.offset = 0;
+   mem-bus.size = mem-num_pages  PAGE_SHIFT;
+   mem-bus.base = 0;
+   if (!(man-flags  TTM_MEMTYPE_FLAG_MAPPABLE))
+   return -EINVAL;
+   switch (mem-mem_type) {
+   case TTM_PL_SYSTEM:
+   /* System memory */
+   return 0;
+   case TTM_PL_VRAM:
+   mem-bus.offset = mem-mm_node-start  PAGE_SHIFT;
+   mem-bus.base = dev_priv-vram_start;
+   mem-bus.is_iomem = true;
+   break;
+   default:
+   return -EINVAL;
+   }
+   return 0;
+}
+
+static void vmw_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg 
*mem)
+{
+}
+
+static int vmw_ttm_fault_reserve_notify(struct ttm_buffer_object *bo)
+{
+   return 0;
+}
+
 /**
  * FIXME: We're using the old vmware polling method to sync.
  * Do this with fences instead.
@@ -248,5 +283,8 @@ struct ttm_bo_driver vmw_bo_driver = {
.sync_obj_unref = vmw_sync_obj_unref,
.sync_obj_ref = vmw_sync_obj_ref,
.move_notify = vmw_move_notify,
-   .swap_notify = vmw_swap_notify
+   .swap_notify = vmw_swap_notify,
+   .fault_reserve_notify = vmw_ttm_fault_reserve_notify,
+   .io_mem_reserve = vmw_ttm_io_mem_reserve,
+   .io_mem_free = vmw_ttm_io_mem_free,
 };
-- 
1.6.6.1


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


[PATCH 10/13] drm/nouveau/kms: don't initialize TTM io memory manager field

2010-03-25 Thread Jerome Glisse
This isn't needed anymore with the new TTM fault callback

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/nouveau/nouveau_bo.c |   11 ---
 1 files changed, 0 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index dafb79c..9812d1c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -391,13 +391,6 @@ nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, 
uint32_t type,
man-available_caching = TTM_PL_FLAG_UNCACHED |
 TTM_PL_FLAG_WC;
man-default_caching = TTM_PL_FLAG_WC;
-
-   man-io_addr = NULL;
-   man-io_offset = drm_get_resource_start(dev, 1);
-   man-io_size = drm_get_resource_len(dev, 1);
-   if (man-io_size  nouveau_mem_fb_amount(dev))
-   man-io_size = nouveau_mem_fb_amount(dev);
-
man-gpu_offset = dev_priv-vm_vram_base;
break;
case TTM_PL_TT:
@@ -419,10 +412,6 @@ nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, 
uint32_t type,
 dev_priv-gart_info.type);
return -EINVAL;
}
-
-   man-io_offset  = dev_priv-gart_info.aper_base;
-   man-io_size= dev_priv-gart_info.aper_size;
-   man-io_addr   = NULL;
man-gpu_offset = dev_priv-vm_gart_base;
break;
default:
-- 
1.6.6.1


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


[PATCH 06/13] drm/radeon/kms: add support for new fault callback V5

2010-03-25 Thread Jerome Glisse
This add the support for the new fault callback and also the
infrastructure for supporting unmappable VRAM.

V2 validate BO with no_wait = true
V3 don't derefence bo-mem.mm_node as it's not NULL only for
   VRAM or GTT
V4 update to splitted no_wait ttm change
V5 update to new balanced io_mem_reserve/free change

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/radeon/radeon_object.c |   26 -
 drivers/gpu/drm/radeon/radeon_object.h |2 +-
 drivers/gpu/drm/radeon/radeon_ttm.c|   47 +++-
 3 files changed, 71 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_object.c 
b/drivers/gpu/drm/radeon/radeon_object.c
index 3adfa88..aef44f3 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -496,11 +496,33 @@ void radeon_bo_move_notify(struct ttm_buffer_object *bo,
radeon_bo_check_tiling(rbo, 0, 1);
 }
 
-void radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
+int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 {
+   struct radeon_device *rdev;
struct radeon_bo *rbo;
+   unsigned long offset, size;
+   int r;
+
if (!radeon_ttm_bo_is_radeon_bo(bo))
-   return;
+   return 0;
rbo = container_of(bo, struct radeon_bo, tbo);
radeon_bo_check_tiling(rbo, 0, 0);
+   rdev = rbo-rdev;
+   if (bo-mem.mem_type == TTM_PL_VRAM) {
+   size = bo-mem.num_pages  PAGE_SHIFT;
+   offset = bo-mem.mm_node-start  PAGE_SHIFT;
+   if ((offset + size)  rdev-mc.visible_vram_size) {
+   /* hurrah the memory is not visible ! */
+   radeon_ttm_placement_from_domain(rbo, 
RADEON_GEM_DOMAIN_VRAM);
+   rbo-placement.lpfn = rdev-mc.visible_vram_size  
PAGE_SHIFT;
+   r = ttm_bo_validate(bo, rbo-placement, false, true, 
false);
+   if (unlikely(r != 0))
+   return r;
+   offset = bo-mem.mm_node-start  PAGE_SHIFT;
+   /* this should not happen */
+   if ((offset + size)  rdev-mc.visible_vram_size)
+   return -EINVAL;
+   }
+   }
+   return 0;
 }
diff --git a/drivers/gpu/drm/radeon/radeon_object.h 
b/drivers/gpu/drm/radeon/radeon_object.h
index 7ab43de..353998d 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -168,6 +168,6 @@ extern int radeon_bo_check_tiling(struct radeon_bo *bo, 
bool has_moved,
bool force_drop);
 extern void radeon_bo_move_notify(struct ttm_buffer_object *bo,
struct ttm_mem_reg *mem);
-extern void radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
+extern int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
 extern int radeon_bo_get_surface_reg(struct radeon_bo *bo);
 #endif
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
b/drivers/gpu/drm/radeon/radeon_ttm.c
index ba4724c..d2700b8 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -437,10 +437,53 @@ static int radeon_bo_move(struct ttm_buffer_object *bo,
 memcpy:
r = ttm_bo_move_memcpy(bo, evict, no_wait_reserve, no_wait_gpu, 
new_mem);
}
-
return r;
 }
 
+static int radeon_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct 
ttm_mem_reg *mem)
+{
+   struct ttm_mem_type_manager *man = bdev-man[mem-mem_type];
+   struct radeon_device *rdev = radeon_get_rdev(bdev);
+
+   mem-bus.offset = 0;
+   mem-bus.size = mem-num_pages  PAGE_SHIFT;
+   mem-bus.base = 0;
+   mem-bus.is_iomem = false;
+   if (!(man-flags  TTM_MEMTYPE_FLAG_MAPPABLE))
+   return -EINVAL;
+   switch (mem-mem_type) {
+   case TTM_PL_SYSTEM:
+   /* system memory */
+   return 0;
+   case TTM_PL_TT:
+#if __OS_HAS_AGP
+   if (rdev-flags  RADEON_IS_AGP) {
+   /* RADEON_IS_AGP is set only if AGP is active */
+   mem-bus.offset = mem-mm_node-start  PAGE_SHIFT;
+   mem-bus.base = rdev-mc.agp_base;
+   mem-bus.is_iomem = true;
+   }
+#endif
+   return 0;
+   case TTM_PL_VRAM:
+   mem-bus.offset = mem-mm_node-start  PAGE_SHIFT;
+   /* check if it's visible */
+   if ((mem-bus.offset + mem-bus.size)  
rdev-mc.visible_vram_size)
+   return -EINVAL;
+   mem-bus.base = rdev-mc.aper_base;
+   mem-bus.is_iomem = true;
+   break;
+   default:
+   return -EINVAL;
+   }
+   return 0;
+}
+
+static void radeon_ttm_io_mem_free(struct ttm_bo_device *bdev, struct 
ttm_mem_reg *mem

[PATCH 02/13] drm/radeon/kms: update to TTM no_wait splitted argument

2010-03-25 Thread Jerome Glisse
This patch update radeon to the new no_wait splitted argument
TTM functionality.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/radeon/radeon_object.c |6 ++--
 drivers/gpu/drm/radeon/radeon_ttm.c|   39 +--
 2 files changed, 24 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_object.c 
b/drivers/gpu/drm/radeon/radeon_object.c
index fc9d00a..3adfa88 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -189,7 +189,7 @@ int radeon_bo_pin(struct radeon_bo *bo, u32 domain, u64 
*gpu_addr)
bo-placement.lpfn = bo-rdev-mc.visible_vram_size  PAGE_SHIFT;
for (i = 0; i  bo-placement.num_placement; i++)
bo-placements[i] |= TTM_PL_FLAG_NO_EVICT;
-   r = ttm_bo_validate(bo-tbo, bo-placement, false, false);
+   r = ttm_bo_validate(bo-tbo, bo-placement, false, false, false);
if (likely(r == 0)) {
bo-pin_count = 1;
if (gpu_addr != NULL)
@@ -213,7 +213,7 @@ int radeon_bo_unpin(struct radeon_bo *bo)
return 0;
for (i = 0; i  bo-placement.num_placement; i++)
bo-placements[i] = ~TTM_PL_FLAG_NO_EVICT;
-   r = ttm_bo_validate(bo-tbo, bo-placement, false, false);
+   r = ttm_bo_validate(bo-tbo, bo-placement, false, false, false);
if (unlikely(r != 0))
dev_err(bo-rdev-dev, %p validate failed for unpin\n, bo);
return r;
@@ -328,7 +328,7 @@ int radeon_bo_list_validate(struct list_head *head)
lobj-rdomain);
}
r = ttm_bo_validate(bo-tbo, bo-placement,
-   true, false);
+   true, false, false);
if (unlikely(r))
return r;
}
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
b/drivers/gpu/drm/radeon/radeon_ttm.c
index 43c5ab3..ba4724c 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -243,9 +243,9 @@ static void radeon_move_null(struct ttm_buffer_object *bo,
 }
 
 static int radeon_move_blit(struct ttm_buffer_object *bo,
-   bool evict, int no_wait,
-   struct ttm_mem_reg *new_mem,
-   struct ttm_mem_reg *old_mem)
+   bool evict, int no_wait_reserve, bool no_wait_gpu,
+   struct ttm_mem_reg *new_mem,
+   struct ttm_mem_reg *old_mem)
 {
struct radeon_device *rdev;
uint64_t old_start, new_start;
@@ -289,13 +289,14 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
r = radeon_copy(rdev, old_start, new_start, new_mem-num_pages, fence);
/* FIXME: handle copy error */
r = ttm_bo_move_accel_cleanup(bo, (void *)fence, NULL,
- evict, no_wait, new_mem);
+ evict, no_wait_reserve, no_wait_gpu, 
new_mem);
radeon_fence_unref(fence);
return r;
 }
 
 static int radeon_move_vram_ram(struct ttm_buffer_object *bo,
-   bool evict, bool interruptible, bool no_wait,
+   bool evict, bool interruptible,
+   bool no_wait_reserve, bool no_wait_gpu,
struct ttm_mem_reg *new_mem)
 {
struct radeon_device *rdev;
@@ -316,7 +317,7 @@ static int radeon_move_vram_ram(struct ttm_buffer_object 
*bo,
placement.busy_placement = placements;
placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
r = ttm_bo_mem_space(bo, placement, tmp_mem,
-interruptible, no_wait);
+interruptible, no_wait_reserve, no_wait_gpu);
if (unlikely(r)) {
return r;
}
@@ -330,11 +331,11 @@ static int radeon_move_vram_ram(struct ttm_buffer_object 
*bo,
if (unlikely(r)) {
goto out_cleanup;
}
-   r = radeon_move_blit(bo, true, no_wait, tmp_mem, old_mem);
+   r = radeon_move_blit(bo, true, no_wait_reserve, no_wait_gpu, tmp_mem, 
old_mem);
if (unlikely(r)) {
goto out_cleanup;
}
-   r = ttm_bo_move_ttm(bo, true, no_wait, new_mem);
+   r = ttm_bo_move_ttm(bo, true, no_wait_reserve, no_wait_gpu, new_mem);
 out_cleanup:
if (tmp_mem.mm_node) {
struct ttm_bo_global *glob = rdev-mman.bdev.glob;
@@ -348,7 +349,8 @@ out_cleanup:
 }
 
 static int radeon_move_ram_vram(struct ttm_buffer_object *bo,
-   bool evict, bool interruptible, bool no_wait,
+   bool evict, bool interruptible,
+   bool no_wait_reserve, bool no_wait_gpu

[PATCH 03/13] drm/nouveau: update to TTM no_wait splitted argument

2010-03-25 Thread Jerome Glisse
This patch update radeon to the new no_wait splitted argument
TTM functionality.

Compile tested only (but thing should run as there is no
operating change from driver point of view)

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/nouveau/nouveau_bo.c  |   45 ++--
 drivers/gpu/drm/nouveau/nouveau_gem.c |2 +-
 2 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 0266124..5a167de 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -219,7 +219,7 @@ nouveau_bo_pin(struct nouveau_bo *nvbo, uint32_t memtype)
for (i = 0; i  nvbo-placement.num_placement; i++)
nvbo-placements[i] |= TTM_PL_FLAG_NO_EVICT;
 
-   ret = ttm_bo_validate(bo, nvbo-placement, false, false);
+   ret = ttm_bo_validate(bo, nvbo-placement, false, false, false);
if (ret == 0) {
switch (bo-mem.mem_type) {
case TTM_PL_VRAM:
@@ -256,7 +256,7 @@ nouveau_bo_unpin(struct nouveau_bo *nvbo)
for (i = 0; i  nvbo-placement.num_placement; i++)
nvbo-placements[i] = ~TTM_PL_FLAG_NO_EVICT;
 
-   ret = ttm_bo_validate(bo, nvbo-placement, false, false);
+   ret = ttm_bo_validate(bo, nvbo-placement, false, false, false);
if (ret == 0) {
switch (bo-mem.mem_type) {
case TTM_PL_VRAM:
@@ -456,7 +456,8 @@ nouveau_bo_evict_flags(struct ttm_buffer_object *bo, struct 
ttm_placement *pl)
 
 static int
 nouveau_bo_move_accel_cleanup(struct nouveau_channel *chan,
- struct nouveau_bo *nvbo, bool evict, bool no_wait,
+ struct nouveau_bo *nvbo, bool evict,
+ bool no_wait_reserve, bool no_wait_gpu,
  struct ttm_mem_reg *new_mem)
 {
struct nouveau_fence *fence = NULL;
@@ -467,7 +468,7 @@ nouveau_bo_move_accel_cleanup(struct nouveau_channel *chan,
return ret;
 
ret = ttm_bo_move_accel_cleanup(nvbo-bo, fence, NULL,
-   evict, no_wait, new_mem);
+   evict, no_wait_reserve, no_wait_gpu, 
new_mem);
if (nvbo-channel  nvbo-channel != chan)
ret = nouveau_fence_wait(fence, NULL, false, false);
nouveau_fence_unref((void *)fence);
@@ -491,7 +492,8 @@ nouveau_bo_mem_ctxdma(struct nouveau_bo *nvbo, struct 
nouveau_channel *chan,
 
 static int
 nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
-int no_wait, struct ttm_mem_reg *new_mem)
+bool no_wait_reserve, bool no_wait_gpu,
+struct ttm_mem_reg *new_mem)
 {
struct nouveau_bo *nvbo = nouveau_bo(bo);
struct drm_nouveau_private *dev_priv = nouveau_bdev(bo-bdev);
@@ -569,12 +571,13 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int 
evict, bool intr,
dst_offset += (PAGE_SIZE * line_count);
}
 
-   return nouveau_bo_move_accel_cleanup(chan, nvbo, evict, no_wait, 
new_mem);
+   return nouveau_bo_move_accel_cleanup(chan, nvbo, evict, 
no_wait_reserve, no_wait_gpu, new_mem);
 }
 
 static int
 nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr,
- bool no_wait, struct ttm_mem_reg *new_mem)
+ bool no_wait_reserve, bool no_wait_gpu,
+ struct ttm_mem_reg *new_mem)
 {
u32 placement_memtype = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING;
struct ttm_placement placement;
@@ -587,7 +590,7 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool 
evict, bool intr,
 
tmp_mem = *new_mem;
tmp_mem.mm_node = NULL;
-   ret = ttm_bo_mem_space(bo, placement, tmp_mem, intr, no_wait);
+   ret = ttm_bo_mem_space(bo, placement, tmp_mem, intr, no_wait_reserve, 
no_wait_gpu);
if (ret)
return ret;
 
@@ -595,11 +598,11 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool 
evict, bool intr,
if (ret)
goto out;
 
-   ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait, tmp_mem);
+   ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait_reserve, 
no_wait_gpu, tmp_mem);
if (ret)
goto out;
 
-   ret = ttm_bo_move_ttm(bo, evict, no_wait, new_mem);
+   ret = ttm_bo_move_ttm(bo, evict, no_wait_reserve, no_wait_gpu, new_mem);
 out:
if (tmp_mem.mm_node) {
spin_lock(bo-bdev-glob-lru_lock);
@@ -612,7 +615,8 @@ out:
 
 static int
 nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict, bool intr,
- bool no_wait, struct ttm_mem_reg *new_mem)
+ bool no_wait_reserve, bool no_wait_gpu,
+ struct ttm_mem_reg *new_mem)
 {
u32 placement_memtype = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING

[PATCH 09/13] drm/radeon/kms: don't initialize TTM io memory manager field

2010-03-25 Thread Jerome Glisse
This isn't needed anymore with the new TTM fault callback

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/radeon/radeon_ttm.c |   13 +
 1 files changed, 1 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
b/drivers/gpu/drm/radeon/radeon_ttm.c
index d2700b8..0220154 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -161,22 +161,14 @@ static int radeon_init_mem_type(struct ttm_bo_device 
*bdev, uint32_t type,
  (unsigned)type);
return -EINVAL;
}
-   man-io_offset = rdev-mc.agp_base;
-   man-io_size = rdev-mc.gtt_size;
-   man-io_addr = NULL;
if (!rdev-ddev-agp-cant_use_aperture)
man-flags = TTM_MEMTYPE_FLAG_NEEDS_IOREMAP |
 TTM_MEMTYPE_FLAG_MAPPABLE;
man-available_caching = TTM_PL_FLAG_UNCACHED |
 TTM_PL_FLAG_WC;
man-default_caching = TTM_PL_FLAG_WC;
-   } else
-#endif
-   {
-   man-io_offset = 0;
-   man-io_size = 0;
-   man-io_addr = NULL;
}
+#endif
break;
case TTM_PL_VRAM:
/* On-card video ram */
@@ -186,9 +178,6 @@ static int radeon_init_mem_type(struct ttm_bo_device *bdev, 
uint32_t type,
 TTM_MEMTYPE_FLAG_MAPPABLE;
man-available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC;
man-default_caching = TTM_PL_FLAG_WC;
-   man-io_addr = NULL;
-   man-io_offset = rdev-mc.aper_base;
-   man-io_size = rdev-mc.aper_size;
break;
default:
DRM_ERROR(Unsupported memory type %u\n, (unsigned)type);
-- 
1.6.6.1


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


[PATCH 01/13] drm/ttm: split no_wait argument in 2 GPU or reserve wait

2010-03-25 Thread Jerome Glisse
There is case where we want to be able to wait only for the
GPU while not waiting for other buffer to be unreserved. This
patch split the no_wait argument all the way down in the whole
ttm path so that upper level can decide on what to wait on or
not.

This patch break the API to other modules, update to others
driver are following in separate patches.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/ttm/ttm_bo.c  |   57 
 drivers/gpu/drm/ttm/ttm_bo_util.c |9 --
 include/drm/ttm/ttm_bo_api.h  |6 ++-
 include/drm/ttm/ttm_bo_driver.h   |   29 +++---
 4 files changed, 60 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 9db02bb..6f51b30 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -357,7 +357,8 @@ static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, 
bool zero_alloc)
 
 static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
  struct ttm_mem_reg *mem,
- bool evict, bool interruptible, bool no_wait)
+ bool evict, bool interruptible,
+ bool no_wait_reserve, bool no_wait_gpu)
 {
struct ttm_bo_device *bdev = bo-bdev;
bool old_is_pci = ttm_mem_reg_is_pci(bdev, bo-mem);
@@ -402,12 +403,12 @@ static int ttm_bo_handle_move_mem(struct 
ttm_buffer_object *bo,
 
if (!(old_man-flags  TTM_MEMTYPE_FLAG_FIXED) 
!(new_man-flags  TTM_MEMTYPE_FLAG_FIXED))
-   ret = ttm_bo_move_ttm(bo, evict, no_wait, mem);
+   ret = ttm_bo_move_ttm(bo, evict, no_wait_reserve, no_wait_gpu, 
mem);
else if (bdev-driver-move)
ret = bdev-driver-move(bo, evict, interruptible,
-no_wait, mem);
+no_wait_reserve, no_wait_gpu, mem);
else
-   ret = ttm_bo_move_memcpy(bo, evict, no_wait, mem);
+   ret = ttm_bo_move_memcpy(bo, evict, no_wait_reserve, 
no_wait_gpu, mem);
 
if (ret)
goto out_err;
@@ -606,7 +607,7 @@ void ttm_bo_unref(struct ttm_buffer_object **p_bo)
 EXPORT_SYMBOL(ttm_bo_unref);
 
 static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible,
-   bool no_wait)
+   bool no_wait_reserve, bool no_wait_gpu)
 {
struct ttm_bo_device *bdev = bo-bdev;
struct ttm_bo_global *glob = bo-glob;
@@ -615,7 +616,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool 
interruptible,
int ret = 0;
 
spin_lock(bo-lock);
-   ret = ttm_bo_wait(bo, false, interruptible, no_wait);
+   ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
spin_unlock(bo-lock);
 
if (unlikely(ret != 0)) {
@@ -638,7 +639,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool 
interruptible,
placement.num_busy_placement = 0;
bdev-driver-evict_flags(bo, placement);
ret = ttm_bo_mem_space(bo, placement, evict_mem, interruptible,
-   no_wait);
+   no_wait_reserve, no_wait_gpu);
if (ret) {
if (ret != -ERESTARTSYS) {
printk(KERN_ERR TTM_PFX
@@ -650,7 +651,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool 
interruptible,
}
 
ret = ttm_bo_handle_move_mem(bo, evict_mem, true, interruptible,
-no_wait);
+no_wait_reserve, no_wait_gpu);
if (ret) {
if (ret != -ERESTARTSYS)
printk(KERN_ERR TTM_PFX Buffer eviction failed\n);
@@ -670,7 +671,8 @@ out:
 
 static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
uint32_t mem_type,
-   bool interruptible, bool no_wait)
+   bool interruptible, bool no_wait_reserve,
+   bool no_wait_gpu)
 {
struct ttm_bo_global *glob = bdev-glob;
struct ttm_mem_type_manager *man = bdev-man[mem_type];
@@ -687,11 +689,11 @@ retry:
bo = list_first_entry(man-lru, struct ttm_buffer_object, lru);
kref_get(bo-list_kref);
 
-   ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
+   ret = ttm_bo_reserve_locked(bo, false, no_wait_reserve, false, 0);
 
if (unlikely(ret == -EBUSY)) {
spin_unlock(glob-lru_lock);
-   if (likely(!no_wait))
+   if (likely(!no_wait_gpu))
ret = ttm_bo_wait_unreserved(bo, interruptible);
 
kref_put(bo-list_kref, ttm_bo_release_list);
@@ -713,7 +715,7 @@ retry:
while (put_count--)
kref_put(bo-list_kref, ttm_bo_ref_bug);
 
-   ret = ttm_bo_evict(bo

[PATCH 13/13] drm/radeon/kms: enable use of unmappable VRAM V2

2010-03-25 Thread Jerome Glisse
This patch enable the use of unmappable VRAM thanks to
previous TTM infrastructure change.

V2 update after io_mem_reserve/io_mem_free callback balancing

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/radeon/evergreen.c |5 -
 drivers/gpu/drm/radeon/r100.c  |5 -
 drivers/gpu/drm/radeon/r600.c  |5 -
 drivers/gpu/drm/radeon/rv770.c |5 -
 4 files changed, 0 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/radeon/evergreen.c 
b/drivers/gpu/drm/radeon/evergreen.c
index 7672f11..48e0dfb 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -475,11 +475,6 @@ int evergreen_mc_init(struct radeon_device *rdev)
rdev-mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
rdev-mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
rdev-mc.visible_vram_size = rdev-mc.aper_size;
-   /* FIXME remove this once we support unmappable VRAM */
-   if (rdev-mc.mc_vram_size  rdev-mc.aper_size) {
-   rdev-mc.mc_vram_size = rdev-mc.aper_size;
-   rdev-mc.real_vram_size = rdev-mc.aper_size;
-   }
r600_vram_gtt_location(rdev, rdev-mc);
radeon_update_bandwidth_info(rdev);
 
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 7a1180d..fc95907 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -2026,11 +2026,6 @@ void r100_vram_init_sizes(struct radeon_device *rdev)
else
rdev-mc.mc_vram_size = rdev-mc.real_vram_size;
}
-   /* FIXME remove this once we support unmappable VRAM */
-   if (rdev-mc.mc_vram_size  rdev-mc.aper_size) {
-   rdev-mc.mc_vram_size = rdev-mc.aper_size;
-   rdev-mc.real_vram_size = rdev-mc.aper_size;
-   }
 }
 
 void r100_vga_set_state(struct radeon_device *rdev, bool state)
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index a948947..4206c8d 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -713,11 +713,6 @@ int r600_mc_init(struct radeon_device *rdev)
rdev-mc.mc_vram_size = RREG32(CONFIG_MEMSIZE);
rdev-mc.real_vram_size = RREG32(CONFIG_MEMSIZE);
rdev-mc.visible_vram_size = rdev-mc.aper_size;
-   /* FIXME remove this once we support unmappable VRAM */
-   if (rdev-mc.mc_vram_size  rdev-mc.aper_size) {
-   rdev-mc.mc_vram_size = rdev-mc.aper_size;
-   rdev-mc.real_vram_size = rdev-mc.aper_size;
-   }
r600_vram_gtt_location(rdev, rdev-mc);
 
if (rdev-flags  RADEON_IS_IGP)
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 2b8a4e1..599121b 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -905,11 +905,6 @@ int rv770_mc_init(struct radeon_device *rdev)
rdev-mc.mc_vram_size = RREG32(CONFIG_MEMSIZE);
rdev-mc.real_vram_size = RREG32(CONFIG_MEMSIZE);
rdev-mc.visible_vram_size = rdev-mc.aper_size;
-   /* FIXME remove this once we support unmappable VRAM */
-   if (rdev-mc.mc_vram_size  rdev-mc.aper_size) {
-   rdev-mc.mc_vram_size = rdev-mc.aper_size;
-   rdev-mc.real_vram_size = rdev-mc.aper_size;
-   }
r600_vram_gtt_location(rdev, rdev-mc);
radeon_update_bandwidth_info(rdev);
 
-- 
1.6.6.1


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


[PATCH 05/13] drm/ttm: ttm_fault callback to allow driver to handle bo placement V4

2010-03-25 Thread Jerome Glisse
On fault the driver is given the opportunity to perform any operation
it sees fit in order to place the buffer into a CPU visible area of
memory. This patch doesn't break TTM users, nouveau, vmwgfx and radeon
should keep working properly. Future patch will take advantage of this
infrastructure and remove the old path from TTM once driver are
converted.

V2 return VM_FAULT_NOPAGE if callback return -EBUSY or -ERESTARTSYS
V3 balance io_mem_reserve and io_mem_free call, fault_reserve_notify
   is responsible to perform any necessary task for mapping to succeed
V4 minor cleanup, atomic_t - bool as member is protected by reserve
   mecanism from concurent access

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/ttm/ttm_bo.c  |7 ++-
 drivers/gpu/drm/ttm/ttm_bo_util.c |   98 ++--
 drivers/gpu/drm/ttm/ttm_bo_vm.c   |   41 
 include/drm/ttm/ttm_bo_api.h  |   21 
 include/drm/ttm/ttm_bo_driver.h   |   16 ++-
 5 files changed, 111 insertions(+), 72 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 6f51b30..2171f86 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -632,6 +632,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool 
interruptible,
 
evict_mem = bo-mem;
evict_mem.mm_node = NULL;
+   evict_mem.bus.io_reserved = false;
 
placement.fpfn = 0;
placement.lpfn = 0;
@@ -1005,6 +1006,7 @@ int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
mem.num_pages = bo-num_pages;
mem.size = mem.num_pages  PAGE_SHIFT;
mem.page_alignment = bo-mem.page_alignment;
+   mem.bus.io_reserved = false;
/*
 * Determine where to move the buffer.
 */
@@ -1160,6 +1162,7 @@ int ttm_bo_init(struct ttm_bo_device *bdev,
bo-mem.num_pages = bo-num_pages;
bo-mem.mm_node = NULL;
bo-mem.page_alignment = page_alignment;
+   bo-mem.bus.io_reserved = false;
bo-buffer_start = buffer_start  PAGE_MASK;
bo-priv_flags = 0;
bo-mem.placement = (TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED);
@@ -1574,7 +1577,7 @@ int ttm_bo_pci_offset(struct ttm_bo_device *bdev,
if (ttm_mem_reg_is_pci(bdev, mem)) {
*bus_offset = mem-mm_node-start  PAGE_SHIFT;
*bus_size = mem-num_pages  PAGE_SHIFT;
-   *bus_base = man-io_offset;
+   *bus_base = man-io_offset + (uintptr_t)man-io_addr;
}
 
return 0;
@@ -1588,8 +1591,8 @@ void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo)
 
if (!bdev-dev_mapping)
return;
-
unmap_mapping_range(bdev-dev_mapping, offset, holelen, 1);
+   ttm_mem_io_free(bdev, bo-mem);
 }
 EXPORT_SYMBOL(ttm_bo_unmap_virtual);
 
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 865b2a8..878dc49 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -81,30 +81,59 @@ int ttm_bo_move_ttm(struct ttm_buffer_object *bo,
 }
 EXPORT_SYMBOL(ttm_bo_move_ttm);
 
+int ttm_mem_io_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
+{
+   int ret;
+
+   if (bdev-driver-io_mem_reserve) {
+   if (!mem-bus.io_reserved) {
+   mem-bus.io_reserved = true;
+   ret = bdev-driver-io_mem_reserve(bdev, mem);
+   if (unlikely(ret != 0))
+   return ret;
+   }
+   } else {
+   ret = ttm_bo_pci_offset(bdev, mem, mem-bus.base, 
mem-bus.offset, mem-bus.size);
+   if (unlikely(ret != 0))
+   return ret;
+   mem-bus.is_iomem = (mem-bus.size  0) ? 1 : 0;
+   }
+   return 0;
+}
+
+void ttm_mem_io_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
+{
+   if (bdev-driver-io_mem_reserve) {
+   if (mem-bus.io_reserved) {
+   mem-bus.io_reserved = false;
+   bdev-driver-io_mem_free(bdev, mem);
+   }
+   }
+}
+
 int ttm_mem_reg_ioremap(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem,
void **virtual)
 {
struct ttm_mem_type_manager *man = bdev-man[mem-mem_type];
-   unsigned long bus_offset;
-   unsigned long bus_size;
-   unsigned long bus_base;
int ret;
void *addr;
 
*virtual = NULL;
-   ret = ttm_bo_pci_offset(bdev, mem, bus_base, bus_offset, bus_size);
-   if (ret || bus_size == 0)
+   ret = ttm_mem_io_reserve(bdev, mem);
+   if (ret)
return ret;
 
-   if (!(man-flags  TTM_MEMTYPE_FLAG_NEEDS_IOREMAP))
-   addr = (void *)(((u8 *) man-io_addr) + bus_offset);
-   else {
+   if (!(man-flags  TTM_MEMTYPE_FLAG_NEEDS_IOREMAP)) {
+   addr = (void *)(mem-bus.base + mem-bus.offset);
+   } else

unmappable vram V5

2010-03-24 Thread Jerome Glisse
I think i have addressed all concern.

Patch doesn't follow 80colons limit as their was discussion
about dropping that on lkml.

only compile tested on nouveau  vmwgfx.

Sorry for short mail but i wrote a lot longer one which got
lost ... don't feel like rewriting it.

Cheers,
Jerome


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


[PATCH 04/14] drm/vmwgfx: update to TTM no_wait splitted argument

2010-03-24 Thread Jerome Glisse
This patch update radeon to the new no_wait splitted argument
TTM functionality.

Compile tested only (but thing should run as there is no
operating change from driver point of view)

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c |4 ++--
 drivers/gpu/drm/vmwgfx/vmwgfx_fb.c  |4 ++--
 drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c |2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 0897359..dbd36b8 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -570,7 +570,7 @@ static int vmw_validate_single_buffer(struct vmw_private 
*dev_priv,
 * Put BO in VRAM, only if there is space.
 */
 
-   ret = ttm_bo_validate(bo, vmw_vram_sys_placement, true, false);
+   ret = ttm_bo_validate(bo, vmw_vram_sys_placement, true, false, false);
if (unlikely(ret == -ERESTARTSYS))
return ret;
 
@@ -590,7 +590,7 @@ static int vmw_validate_single_buffer(struct vmw_private 
*dev_priv,
 * previous contents.
 */
 
-   ret = ttm_bo_validate(bo, vmw_vram_placement, true, false);
+   ret = ttm_bo_validate(bo, vmw_vram_placement, true, false, false);
return ret;
 }
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
index a933670..80125ff 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
@@ -628,7 +628,7 @@ int vmw_dmabuf_from_vram(struct vmw_private *vmw_priv,
if (unlikely(ret != 0))
return ret;
 
-   ret = ttm_bo_validate(bo, vmw_sys_placement, false, false);
+   ret = ttm_bo_validate(bo, vmw_sys_placement, false, false, false);
ttm_bo_unreserve(bo);
 
return ret;
@@ -652,7 +652,7 @@ int vmw_dmabuf_to_start_of_vram(struct vmw_private 
*vmw_priv,
if (unlikely(ret != 0))
goto err_unlock;
 
-   ret = ttm_bo_validate(bo, ne_placement, false, false);
+   ret = ttm_bo_validate(bo, ne_placement, false, false, false);
ttm_bo_unreserve(bo);
 err_unlock:
ttm_write_unlock(vmw_priv-active_master-lock);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
index 5b6eabe..ad566c8 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
@@ -118,7 +118,7 @@ static int vmw_dmabuf_pin_in_vram(struct vmw_private 
*dev_priv,
if (pin)
overlay_placement = vmw_vram_ne_placement;
 
-   ret = ttm_bo_validate(bo, overlay_placement, interruptible, false);
+   ret = ttm_bo_validate(bo, overlay_placement, interruptible, false, 
false);
 
ttm_bo_unreserve(bo);
 
-- 
1.6.6.1


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


[PATCH 02/14] drm/radeon/kms: update to TTM no_wait splitted argument

2010-03-24 Thread Jerome Glisse
This patch update radeon to the new no_wait splitted argument
TTM functionality.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/radeon/radeon_object.c |6 ++--
 drivers/gpu/drm/radeon/radeon_ttm.c|   39 +--
 2 files changed, 24 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_object.c 
b/drivers/gpu/drm/radeon/radeon_object.c
index fc9d00a..3adfa88 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -189,7 +189,7 @@ int radeon_bo_pin(struct radeon_bo *bo, u32 domain, u64 
*gpu_addr)
bo-placement.lpfn = bo-rdev-mc.visible_vram_size  PAGE_SHIFT;
for (i = 0; i  bo-placement.num_placement; i++)
bo-placements[i] |= TTM_PL_FLAG_NO_EVICT;
-   r = ttm_bo_validate(bo-tbo, bo-placement, false, false);
+   r = ttm_bo_validate(bo-tbo, bo-placement, false, false, false);
if (likely(r == 0)) {
bo-pin_count = 1;
if (gpu_addr != NULL)
@@ -213,7 +213,7 @@ int radeon_bo_unpin(struct radeon_bo *bo)
return 0;
for (i = 0; i  bo-placement.num_placement; i++)
bo-placements[i] = ~TTM_PL_FLAG_NO_EVICT;
-   r = ttm_bo_validate(bo-tbo, bo-placement, false, false);
+   r = ttm_bo_validate(bo-tbo, bo-placement, false, false, false);
if (unlikely(r != 0))
dev_err(bo-rdev-dev, %p validate failed for unpin\n, bo);
return r;
@@ -328,7 +328,7 @@ int radeon_bo_list_validate(struct list_head *head)
lobj-rdomain);
}
r = ttm_bo_validate(bo-tbo, bo-placement,
-   true, false);
+   true, false, false);
if (unlikely(r))
return r;
}
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
b/drivers/gpu/drm/radeon/radeon_ttm.c
index 43c5ab3..ba4724c 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -243,9 +243,9 @@ static void radeon_move_null(struct ttm_buffer_object *bo,
 }
 
 static int radeon_move_blit(struct ttm_buffer_object *bo,
-   bool evict, int no_wait,
-   struct ttm_mem_reg *new_mem,
-   struct ttm_mem_reg *old_mem)
+   bool evict, int no_wait_reserve, bool no_wait_gpu,
+   struct ttm_mem_reg *new_mem,
+   struct ttm_mem_reg *old_mem)
 {
struct radeon_device *rdev;
uint64_t old_start, new_start;
@@ -289,13 +289,14 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
r = radeon_copy(rdev, old_start, new_start, new_mem-num_pages, fence);
/* FIXME: handle copy error */
r = ttm_bo_move_accel_cleanup(bo, (void *)fence, NULL,
- evict, no_wait, new_mem);
+ evict, no_wait_reserve, no_wait_gpu, 
new_mem);
radeon_fence_unref(fence);
return r;
 }
 
 static int radeon_move_vram_ram(struct ttm_buffer_object *bo,
-   bool evict, bool interruptible, bool no_wait,
+   bool evict, bool interruptible,
+   bool no_wait_reserve, bool no_wait_gpu,
struct ttm_mem_reg *new_mem)
 {
struct radeon_device *rdev;
@@ -316,7 +317,7 @@ static int radeon_move_vram_ram(struct ttm_buffer_object 
*bo,
placement.busy_placement = placements;
placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
r = ttm_bo_mem_space(bo, placement, tmp_mem,
-interruptible, no_wait);
+interruptible, no_wait_reserve, no_wait_gpu);
if (unlikely(r)) {
return r;
}
@@ -330,11 +331,11 @@ static int radeon_move_vram_ram(struct ttm_buffer_object 
*bo,
if (unlikely(r)) {
goto out_cleanup;
}
-   r = radeon_move_blit(bo, true, no_wait, tmp_mem, old_mem);
+   r = radeon_move_blit(bo, true, no_wait_reserve, no_wait_gpu, tmp_mem, 
old_mem);
if (unlikely(r)) {
goto out_cleanup;
}
-   r = ttm_bo_move_ttm(bo, true, no_wait, new_mem);
+   r = ttm_bo_move_ttm(bo, true, no_wait_reserve, no_wait_gpu, new_mem);
 out_cleanup:
if (tmp_mem.mm_node) {
struct ttm_bo_global *glob = rdev-mman.bdev.glob;
@@ -348,7 +349,8 @@ out_cleanup:
 }
 
 static int radeon_move_ram_vram(struct ttm_buffer_object *bo,
-   bool evict, bool interruptible, bool no_wait,
+   bool evict, bool interruptible,
+   bool no_wait_reserve, bool no_wait_gpu

[PATCH 06/14] drm/radeon/kms: add support for new fault callback V5

2010-03-24 Thread Jerome Glisse
This add the support for the new fault callback and also the
infrastructure for supporting unmappable VRAM.

V2 validate BO with no_wait = true
V3 don't derefence bo-mem.mm_node as it's not NULL only for
   VRAM or GTT
V4 update to splitted no_wait ttm change
V5 update to new balanced io_mem_reserve/free change

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/radeon/radeon_object.c |   26 -
 drivers/gpu/drm/radeon/radeon_object.h |2 +-
 drivers/gpu/drm/radeon/radeon_ttm.c|   47 +++-
 3 files changed, 71 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_object.c 
b/drivers/gpu/drm/radeon/radeon_object.c
index 3adfa88..aef44f3 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -496,11 +496,33 @@ void radeon_bo_move_notify(struct ttm_buffer_object *bo,
radeon_bo_check_tiling(rbo, 0, 1);
 }
 
-void radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
+int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 {
+   struct radeon_device *rdev;
struct radeon_bo *rbo;
+   unsigned long offset, size;
+   int r;
+
if (!radeon_ttm_bo_is_radeon_bo(bo))
-   return;
+   return 0;
rbo = container_of(bo, struct radeon_bo, tbo);
radeon_bo_check_tiling(rbo, 0, 0);
+   rdev = rbo-rdev;
+   if (bo-mem.mem_type == TTM_PL_VRAM) {
+   size = bo-mem.num_pages  PAGE_SHIFT;
+   offset = bo-mem.mm_node-start  PAGE_SHIFT;
+   if ((offset + size)  rdev-mc.visible_vram_size) {
+   /* hurrah the memory is not visible ! */
+   radeon_ttm_placement_from_domain(rbo, 
RADEON_GEM_DOMAIN_VRAM);
+   rbo-placement.lpfn = rdev-mc.visible_vram_size  
PAGE_SHIFT;
+   r = ttm_bo_validate(bo, rbo-placement, false, true, 
false);
+   if (unlikely(r != 0))
+   return r;
+   offset = bo-mem.mm_node-start  PAGE_SHIFT;
+   /* this should not happen */
+   if ((offset + size)  rdev-mc.visible_vram_size)
+   return -EINVAL;
+   }
+   }
+   return 0;
 }
diff --git a/drivers/gpu/drm/radeon/radeon_object.h 
b/drivers/gpu/drm/radeon/radeon_object.h
index 7ab43de..353998d 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -168,6 +168,6 @@ extern int radeon_bo_check_tiling(struct radeon_bo *bo, 
bool has_moved,
bool force_drop);
 extern void radeon_bo_move_notify(struct ttm_buffer_object *bo,
struct ttm_mem_reg *mem);
-extern void radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
+extern int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
 extern int radeon_bo_get_surface_reg(struct radeon_bo *bo);
 #endif
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
b/drivers/gpu/drm/radeon/radeon_ttm.c
index ba4724c..d2700b8 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -437,10 +437,53 @@ static int radeon_bo_move(struct ttm_buffer_object *bo,
 memcpy:
r = ttm_bo_move_memcpy(bo, evict, no_wait_reserve, no_wait_gpu, 
new_mem);
}
-
return r;
 }
 
+static int radeon_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct 
ttm_mem_reg *mem)
+{
+   struct ttm_mem_type_manager *man = bdev-man[mem-mem_type];
+   struct radeon_device *rdev = radeon_get_rdev(bdev);
+
+   mem-bus.offset = 0;
+   mem-bus.size = mem-num_pages  PAGE_SHIFT;
+   mem-bus.base = 0;
+   mem-bus.is_iomem = false;
+   if (!(man-flags  TTM_MEMTYPE_FLAG_MAPPABLE))
+   return -EINVAL;
+   switch (mem-mem_type) {
+   case TTM_PL_SYSTEM:
+   /* system memory */
+   return 0;
+   case TTM_PL_TT:
+#if __OS_HAS_AGP
+   if (rdev-flags  RADEON_IS_AGP) {
+   /* RADEON_IS_AGP is set only if AGP is active */
+   mem-bus.offset = mem-mm_node-start  PAGE_SHIFT;
+   mem-bus.base = rdev-mc.agp_base;
+   mem-bus.is_iomem = true;
+   }
+#endif
+   return 0;
+   case TTM_PL_VRAM:
+   mem-bus.offset = mem-mm_node-start  PAGE_SHIFT;
+   /* check if it's visible */
+   if ((mem-bus.offset + mem-bus.size)  
rdev-mc.visible_vram_size)
+   return -EINVAL;
+   mem-bus.base = rdev-mc.aper_base;
+   mem-bus.is_iomem = true;
+   break;
+   default:
+   return -EINVAL;
+   }
+   return 0;
+}
+
+static void radeon_ttm_io_mem_free(struct ttm_bo_device *bdev, struct 
ttm_mem_reg *mem

[PATCH 10/14] drm/nouveau/kms: don't initialize TTM io memory manager field

2010-03-24 Thread Jerome Glisse
This isn't needed anymore with the new TTM fault callback

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/nouveau/nouveau_bo.c |   11 ---
 1 files changed, 0 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index dafb79c..9812d1c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -391,13 +391,6 @@ nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, 
uint32_t type,
man-available_caching = TTM_PL_FLAG_UNCACHED |
 TTM_PL_FLAG_WC;
man-default_caching = TTM_PL_FLAG_WC;
-
-   man-io_addr = NULL;
-   man-io_offset = drm_get_resource_start(dev, 1);
-   man-io_size = drm_get_resource_len(dev, 1);
-   if (man-io_size  nouveau_mem_fb_amount(dev))
-   man-io_size = nouveau_mem_fb_amount(dev);
-
man-gpu_offset = dev_priv-vm_vram_base;
break;
case TTM_PL_TT:
@@ -419,10 +412,6 @@ nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, 
uint32_t type,
 dev_priv-gart_info.type);
return -EINVAL;
}
-
-   man-io_offset  = dev_priv-gart_info.aper_base;
-   man-io_size= dev_priv-gart_info.aper_size;
-   man-io_addr   = NULL;
man-gpu_offset = dev_priv-vm_gart_base;
break;
default:
-- 
1.6.6.1


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


[PATCH 09/14] drm/radeon/kms: don't initialize TTM io memory manager field

2010-03-24 Thread Jerome Glisse
This isn't needed anymore with the new TTM fault callback

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/radeon/radeon_ttm.c |   13 +
 1 files changed, 1 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
b/drivers/gpu/drm/radeon/radeon_ttm.c
index d2700b8..0220154 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -161,22 +161,14 @@ static int radeon_init_mem_type(struct ttm_bo_device 
*bdev, uint32_t type,
  (unsigned)type);
return -EINVAL;
}
-   man-io_offset = rdev-mc.agp_base;
-   man-io_size = rdev-mc.gtt_size;
-   man-io_addr = NULL;
if (!rdev-ddev-agp-cant_use_aperture)
man-flags = TTM_MEMTYPE_FLAG_NEEDS_IOREMAP |
 TTM_MEMTYPE_FLAG_MAPPABLE;
man-available_caching = TTM_PL_FLAG_UNCACHED |
 TTM_PL_FLAG_WC;
man-default_caching = TTM_PL_FLAG_WC;
-   } else
-#endif
-   {
-   man-io_offset = 0;
-   man-io_size = 0;
-   man-io_addr = NULL;
}
+#endif
break;
case TTM_PL_VRAM:
/* On-card video ram */
@@ -186,9 +178,6 @@ static int radeon_init_mem_type(struct ttm_bo_device *bdev, 
uint32_t type,
 TTM_MEMTYPE_FLAG_MAPPABLE;
man-available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC;
man-default_caching = TTM_PL_FLAG_WC;
-   man-io_addr = NULL;
-   man-io_offset = rdev-mc.aper_base;
-   man-io_size = rdev-mc.aper_size;
break;
default:
DRM_ERROR(Unsupported memory type %u\n, (unsigned)type);
-- 
1.6.6.1


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


[PATCH 11/14] drm/vmwgfx: don't initialize TTM io memory manager field

2010-03-24 Thread Jerome Glisse
This isn't needed anymore with the new TTM fault callback

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c |6 --
 1 files changed, 0 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
index 7e28448..a0fb612 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
@@ -137,9 +137,6 @@ int vmw_invalidate_caches(struct ttm_bo_device *bdev, 
uint32_t flags)
 int vmw_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
  struct ttm_mem_type_manager *man)
 {
-   struct vmw_private *dev_priv =
-   container_of(bdev, struct vmw_private, bdev);
-
switch (type) {
case TTM_PL_SYSTEM:
/* System memory */
@@ -151,11 +148,8 @@ int vmw_init_mem_type(struct ttm_bo_device *bdev, uint32_t 
type,
case TTM_PL_VRAM:
/* On-card video ram */
man-gpu_offset = 0;
-   man-io_offset = dev_priv-vram_start;
-   man-io_size = dev_priv-vram_size;
man-flags = TTM_MEMTYPE_FLAG_FIXED |
TTM_MEMTYPE_FLAG_NEEDS_IOREMAP | TTM_MEMTYPE_FLAG_MAPPABLE;
-   man-io_addr = NULL;
man-available_caching = TTM_PL_MASK_CACHING;
man-default_caching = TTM_PL_FLAG_WC;
break;
-- 
1.6.6.1


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


[PATCH 13/14] drm/radeon/kms: enable use of unmappable VRAM V2

2010-03-24 Thread Jerome Glisse
This patch enable the use of unmappable VRAM thanks to
previous TTM infrastructure change.

V2 update after io_mem_reserve/io_mem_free callback balancing

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/radeon/evergreen.c |5 -
 drivers/gpu/drm/radeon/r100.c  |5 -
 drivers/gpu/drm/radeon/r600.c  |5 -
 drivers/gpu/drm/radeon/rv770.c |5 -
 4 files changed, 0 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/radeon/evergreen.c 
b/drivers/gpu/drm/radeon/evergreen.c
index 7672f11..48e0dfb 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -475,11 +475,6 @@ int evergreen_mc_init(struct radeon_device *rdev)
rdev-mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
rdev-mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
rdev-mc.visible_vram_size = rdev-mc.aper_size;
-   /* FIXME remove this once we support unmappable VRAM */
-   if (rdev-mc.mc_vram_size  rdev-mc.aper_size) {
-   rdev-mc.mc_vram_size = rdev-mc.aper_size;
-   rdev-mc.real_vram_size = rdev-mc.aper_size;
-   }
r600_vram_gtt_location(rdev, rdev-mc);
radeon_update_bandwidth_info(rdev);
 
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 7a1180d..fc95907 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -2026,11 +2026,6 @@ void r100_vram_init_sizes(struct radeon_device *rdev)
else
rdev-mc.mc_vram_size = rdev-mc.real_vram_size;
}
-   /* FIXME remove this once we support unmappable VRAM */
-   if (rdev-mc.mc_vram_size  rdev-mc.aper_size) {
-   rdev-mc.mc_vram_size = rdev-mc.aper_size;
-   rdev-mc.real_vram_size = rdev-mc.aper_size;
-   }
 }
 
 void r100_vga_set_state(struct radeon_device *rdev, bool state)
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index a948947..4206c8d 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -713,11 +713,6 @@ int r600_mc_init(struct radeon_device *rdev)
rdev-mc.mc_vram_size = RREG32(CONFIG_MEMSIZE);
rdev-mc.real_vram_size = RREG32(CONFIG_MEMSIZE);
rdev-mc.visible_vram_size = rdev-mc.aper_size;
-   /* FIXME remove this once we support unmappable VRAM */
-   if (rdev-mc.mc_vram_size  rdev-mc.aper_size) {
-   rdev-mc.mc_vram_size = rdev-mc.aper_size;
-   rdev-mc.real_vram_size = rdev-mc.aper_size;
-   }
r600_vram_gtt_location(rdev, rdev-mc);
 
if (rdev-flags  RADEON_IS_IGP)
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 2b8a4e1..599121b 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -905,11 +905,6 @@ int rv770_mc_init(struct radeon_device *rdev)
rdev-mc.mc_vram_size = RREG32(CONFIG_MEMSIZE);
rdev-mc.real_vram_size = RREG32(CONFIG_MEMSIZE);
rdev-mc.visible_vram_size = rdev-mc.aper_size;
-   /* FIXME remove this once we support unmappable VRAM */
-   if (rdev-mc.mc_vram_size  rdev-mc.aper_size) {
-   rdev-mc.mc_vram_size = rdev-mc.aper_size;
-   rdev-mc.real_vram_size = rdev-mc.aper_size;
-   }
r600_vram_gtt_location(rdev, rdev-mc);
radeon_update_bandwidth_info(rdev);
 
-- 
1.6.6.1


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


[PATCH 08/14] drm/vmwgfx: add support for new TTM fault callback V3

2010-03-24 Thread Jerome Glisse
This add the support for the new fault callback, does change anything
from driver point of view.

Improvement: store the aperture base in a variable so that we don't
call a function to get it on each fault.

Patch hasn't been tested.

V2 don't derefence bo-mem.mm_node as it's not NULL only for
   VRAM or GTT
V3 update after io_mem_reserve/io_mem_free callback balancing

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c |   40 +++-
 1 files changed, 39 insertions(+), 1 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
index 825ebe3..7e28448 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
@@ -193,6 +193,41 @@ static void vmw_swap_notify(struct ttm_buffer_object *bo)
vmw_dmabuf_gmr_unbind(bo);
 }
 
+static int vmw_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct 
ttm_mem_reg *mem)
+{
+   struct ttm_mem_type_manager *man = bdev-man[mem-mem_type];
+   struct vmw_private *dev_priv = container_of(bdev, struct vmw_private, 
bdev);
+
+   mem-bus.is_iomem = false;
+   mem-bus.offset = 0;
+   mem-bus.size = mem-num_pages  PAGE_SHIFT;
+   mem-bus.base = 0;
+   if (!(man-flags  TTM_MEMTYPE_FLAG_MAPPABLE))
+   return -EINVAL;
+   switch (mem-mem_type) {
+   case TTM_PL_SYSTEM:
+   /* System memory */
+   return 0;
+   case TTM_PL_VRAM:
+   mem-bus.offset = mem-mm_node-start  PAGE_SHIFT;
+   mem-bus.base = dev_priv-vram_start;
+   mem-bus.is_iomem = true;
+   break;
+   default:
+   return -EINVAL;
+   }
+   return 0;
+}
+
+static void vmw_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg 
*mem)
+{
+}
+
+static int vmw_ttm_fault_reserve_notify(struct ttm_buffer_object *bo)
+{
+   return 0;
+}
+
 /**
  * FIXME: We're using the old vmware polling method to sync.
  * Do this with fences instead.
@@ -248,5 +283,8 @@ struct ttm_bo_driver vmw_bo_driver = {
.sync_obj_unref = vmw_sync_obj_unref,
.sync_obj_ref = vmw_sync_obj_ref,
.move_notify = vmw_move_notify,
-   .swap_notify = vmw_swap_notify
+   .swap_notify = vmw_swap_notify,
+   .fault_reserve_notify = vmw_ttm_fault_reserve_notify,
+   .io_mem_reserve = vmw_ttm_io_mem_reserve,
+   .io_mem_free = vmw_ttm_io_mem_free,
 };
-- 
1.6.6.1


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


[PATCH 03/14] drm/nouveau: update to TTM no_wait splitted argument

2010-03-24 Thread Jerome Glisse
This patch update radeon to the new no_wait splitted argument
TTM functionality.

Compile tested only (but thing should run as there is no
operating change from driver point of view)

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/nouveau/nouveau_bo.c  |   45 ++--
 drivers/gpu/drm/nouveau/nouveau_gem.c |2 +-
 2 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 0266124..5a167de 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -219,7 +219,7 @@ nouveau_bo_pin(struct nouveau_bo *nvbo, uint32_t memtype)
for (i = 0; i  nvbo-placement.num_placement; i++)
nvbo-placements[i] |= TTM_PL_FLAG_NO_EVICT;
 
-   ret = ttm_bo_validate(bo, nvbo-placement, false, false);
+   ret = ttm_bo_validate(bo, nvbo-placement, false, false, false);
if (ret == 0) {
switch (bo-mem.mem_type) {
case TTM_PL_VRAM:
@@ -256,7 +256,7 @@ nouveau_bo_unpin(struct nouveau_bo *nvbo)
for (i = 0; i  nvbo-placement.num_placement; i++)
nvbo-placements[i] = ~TTM_PL_FLAG_NO_EVICT;
 
-   ret = ttm_bo_validate(bo, nvbo-placement, false, false);
+   ret = ttm_bo_validate(bo, nvbo-placement, false, false, false);
if (ret == 0) {
switch (bo-mem.mem_type) {
case TTM_PL_VRAM:
@@ -456,7 +456,8 @@ nouveau_bo_evict_flags(struct ttm_buffer_object *bo, struct 
ttm_placement *pl)
 
 static int
 nouveau_bo_move_accel_cleanup(struct nouveau_channel *chan,
- struct nouveau_bo *nvbo, bool evict, bool no_wait,
+ struct nouveau_bo *nvbo, bool evict,
+ bool no_wait_reserve, bool no_wait_gpu,
  struct ttm_mem_reg *new_mem)
 {
struct nouveau_fence *fence = NULL;
@@ -467,7 +468,7 @@ nouveau_bo_move_accel_cleanup(struct nouveau_channel *chan,
return ret;
 
ret = ttm_bo_move_accel_cleanup(nvbo-bo, fence, NULL,
-   evict, no_wait, new_mem);
+   evict, no_wait_reserve, no_wait_gpu, 
new_mem);
if (nvbo-channel  nvbo-channel != chan)
ret = nouveau_fence_wait(fence, NULL, false, false);
nouveau_fence_unref((void *)fence);
@@ -491,7 +492,8 @@ nouveau_bo_mem_ctxdma(struct nouveau_bo *nvbo, struct 
nouveau_channel *chan,
 
 static int
 nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
-int no_wait, struct ttm_mem_reg *new_mem)
+bool no_wait_reserve, bool no_wait_gpu,
+struct ttm_mem_reg *new_mem)
 {
struct nouveau_bo *nvbo = nouveau_bo(bo);
struct drm_nouveau_private *dev_priv = nouveau_bdev(bo-bdev);
@@ -569,12 +571,13 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int 
evict, bool intr,
dst_offset += (PAGE_SIZE * line_count);
}
 
-   return nouveau_bo_move_accel_cleanup(chan, nvbo, evict, no_wait, 
new_mem);
+   return nouveau_bo_move_accel_cleanup(chan, nvbo, evict, 
no_wait_reserve, no_wait_gpu, new_mem);
 }
 
 static int
 nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr,
- bool no_wait, struct ttm_mem_reg *new_mem)
+ bool no_wait_reserve, bool no_wait_gpu,
+ struct ttm_mem_reg *new_mem)
 {
u32 placement_memtype = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING;
struct ttm_placement placement;
@@ -587,7 +590,7 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool 
evict, bool intr,
 
tmp_mem = *new_mem;
tmp_mem.mm_node = NULL;
-   ret = ttm_bo_mem_space(bo, placement, tmp_mem, intr, no_wait);
+   ret = ttm_bo_mem_space(bo, placement, tmp_mem, intr, no_wait_reserve, 
no_wait_gpu);
if (ret)
return ret;
 
@@ -595,11 +598,11 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool 
evict, bool intr,
if (ret)
goto out;
 
-   ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait, tmp_mem);
+   ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait_reserve, 
no_wait_gpu, tmp_mem);
if (ret)
goto out;
 
-   ret = ttm_bo_move_ttm(bo, evict, no_wait, new_mem);
+   ret = ttm_bo_move_ttm(bo, evict, no_wait_reserve, no_wait_gpu, new_mem);
 out:
if (tmp_mem.mm_node) {
spin_lock(bo-bdev-glob-lru_lock);
@@ -612,7 +615,8 @@ out:
 
 static int
 nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict, bool intr,
- bool no_wait, struct ttm_mem_reg *new_mem)
+ bool no_wait_reserve, bool no_wait_gpu,
+ struct ttm_mem_reg *new_mem)
 {
u32 placement_memtype = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING

[PATCH 07/14] drm/nouveau/kms: add support for new TTM fault callback V3

2010-03-24 Thread Jerome Glisse
This add the support for the new fault callback, does change anything
from driver point of view, thought it should allow nouveau to add
support for unmappable VRAM.

Improvement: store the aperture base in a variable so that we don't
call a function to get it on each fault.

Patch hasn't been tested on any hw.

V2 don't derefence bo-mem.mm_node as it's not NULL only for
   VRAM or GTT
V3 update after io_mem_reserve/io_mem_free callback balancing

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/nouveau/nouveau_bo.c |   51 ++
 1 files changed, 51 insertions(+), 0 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 5a167de..dafb79c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -761,6 +761,54 @@ nouveau_bo_verify_access(struct ttm_buffer_object *bo, 
struct file *filp)
return 0;
 }
 
+static int
+nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
+{
+   struct ttm_mem_type_manager *man = bdev-man[mem-mem_type];
+   struct drm_nouveau_private *dev_priv = nouveau_bdev(bdev);
+   struct drm_device *dev = dev_priv-dev;
+
+   mem-bus.offset = 0;
+   mem-bus.size = mem-num_pages  PAGE_SHIFT;
+   mem-bus.base = 0;
+   mem-bus.is_iomem = false;
+   if (!(man-flags  TTM_MEMTYPE_FLAG_MAPPABLE))
+   return -EINVAL;
+   switch (mem-mem_type) {
+   case TTM_PL_SYSTEM:
+   /* System memory */
+   return 0;
+   case TTM_PL_TT:
+#if __OS_HAS_AGP
+   if (dev_priv-gart_info.type == NOUVEAU_GART_AGP) {
+   mem-bus.offset = mem-mm_node-start  PAGE_SHIFT;
+   mem-bus.base = dev_priv-gart_info.aper_base;
+   mem-bus.is_iomem = true;
+   }
+#endif
+   return 0;
+   case TTM_PL_VRAM:
+   mem-bus.offset = mem-mm_node-start  PAGE_SHIFT;
+   mem-bus.base = drm_get_resource_start(dev, 1);
+   mem-bus.is_iomem = true;
+   break;
+   default:
+   return -EINVAL;
+   }
+   return 0;
+}
+
+static void
+nouveau_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
+{
+}
+
+static int
+nouveau_ttm_fault_reserve_notify(struct ttm_buffer_object *bo)
+{
+   return 0;
+}
+
 struct ttm_bo_driver nouveau_bo_driver = {
.create_ttm_backend_entry = nouveau_bo_create_ttm_backend_entry,
.invalidate_caches = nouveau_bo_invalidate_caches,
@@ -773,5 +821,8 @@ struct ttm_bo_driver nouveau_bo_driver = {
.sync_obj_flush = nouveau_fence_flush,
.sync_obj_unref = nouveau_fence_unref,
.sync_obj_ref = nouveau_fence_ref,
+   .fault_reserve_notify = nouveau_ttm_fault_reserve_notify,
+   .io_mem_reserve = nouveau_ttm_io_mem_reserve,
+   .io_mem_free = nouveau_ttm_io_mem_free,
 };
 
-- 
1.6.6.1


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


[PATCH 12/14] drm/ttm: remove io_ field from TTM V3

2010-03-24 Thread Jerome Glisse
All TTM driver have been converted to new io_mem_reserve/free
interface which allow driver to choose and return proper io
base, offset to core TTM for ioremapping if necessary. This
patch remove what is now deadcode.

V2 adapt to match with change in first patch of the patchset
V3 update after io_mem_reserve/io_mem_free callback balancing

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/ttm/ttm_bo.c  |   22 --
 drivers/gpu/drm/ttm/ttm_bo_util.c |   26 +++---
 include/drm/ttm/ttm_bo_driver.h   |   10 --
 3 files changed, 7 insertions(+), 51 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 3131416..e3fa47c 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -79,8 +79,6 @@ static void ttm_mem_type_debug(struct ttm_bo_device *bdev, 
int mem_type)
printk(KERN_ERR TTM_PFX use_type: %d\n, man-use_type);
printk(KERN_ERR TTM_PFX flags: 0x%08X\n, man-flags);
printk(KERN_ERR TTM_PFX gpu_offset: 0x%08lX\n, man-gpu_offset);
-   printk(KERN_ERR TTM_PFX io_offset: 0x%08lX\n, man-io_offset);
-   printk(KERN_ERR TTM_PFX io_size: %ld\n, man-io_size);
printk(KERN_ERR TTM_PFX size: %llu\n, man-size);
printk(KERN_ERR TTM_PFX available_caching: 0x%08X\n,
man-available_caching);
@@ -1563,26 +1561,6 @@ bool ttm_mem_reg_is_pci(struct ttm_bo_device *bdev, 
struct ttm_mem_reg *mem)
return true;
 }
 
-int ttm_bo_pci_offset(struct ttm_bo_device *bdev,
- struct ttm_mem_reg *mem,
- unsigned long *bus_base,
- unsigned long *bus_offset, unsigned long *bus_size)
-{
-   struct ttm_mem_type_manager *man = bdev-man[mem-mem_type];
-
-   *bus_size = 0;
-   if (!(man-flags  TTM_MEMTYPE_FLAG_MAPPABLE))
-   return -EINVAL;
-
-   if (ttm_mem_reg_is_pci(bdev, mem)) {
-   *bus_offset = mem-mm_node-start  PAGE_SHIFT;
-   *bus_size = mem-num_pages  PAGE_SHIFT;
-   *bus_base = man-io_offset + (uintptr_t)man-io_addr;
-   }
-
-   return 0;
-}
-
 void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo)
 {
struct ttm_bo_device *bdev = bo-bdev;
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index d5b1de6..14192d9 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -83,30 +83,18 @@ EXPORT_SYMBOL(ttm_bo_move_ttm);
 
 int ttm_mem_io_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
 {
-   int ret;
+   int ret = 0;
 
-   if (bdev-driver-io_mem_reserve) {
-   if (!atomic_xchg(mem-bus.use_count, 1)) {
-   ret = bdev-driver-io_mem_reserve(bdev, mem);
-   if (unlikely(ret != 0)) {
-   return ret;
-   }
-   }
-   } else {
-   ret = ttm_bo_pci_offset(bdev, mem, mem-bus.base, 
mem-bus.offset, mem-bus.size);
-   if (unlikely(ret != 0))
-   return ret;
-   mem-bus.is_iomem = (mem-bus.size  0) ? 1 : 0;
+   if (!atomic_xchg(mem-bus.use_count, 1)) {
+   ret = bdev-driver-io_mem_reserve(bdev, mem);
}
-   return 0;
+   return ret;
 }
 
 void ttm_mem_io_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
 {
-   if (bdev-driver-io_mem_reserve) {
-   atomic_set(mem-bus.use_count, 0);
-   bdev-driver-io_mem_free(bdev, mem);
-   }
+   atomic_set(mem-bus.use_count, 0);
+   bdev-driver-io_mem_free(bdev, mem);
 }
 
 int ttm_mem_reg_ioremap(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem,
@@ -409,7 +397,7 @@ static int ttm_bo_ioremap(struct ttm_buffer_object *bo,
 
if (!(man-flags  TTM_MEMTYPE_FLAG_NEEDS_IOREMAP)) {
map-bo_kmap_type = ttm_bo_map_premapped;
-   map-virtual = (void *)(((u8 *) man-io_addr) + bus_offset);
+   map-virtual = (void *)(bus_base + bus_offset);
} else {
map-bo_kmap_type = ttm_bo_map_iomap;
if (mem-placement  TTM_PL_FLAG_WC)
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index da39865..35e2b9f 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -189,13 +189,6 @@ struct ttm_tt {
  * managed by this memory type.
  * @gpu_offset: If used, the GPU offset of the first managed page of
  * fixed memory or the first managed location in an aperture.
- * @io_offset: The io_offset of the first managed page of IO memory or
- * the first managed location in an aperture. For TTM_MEMTYPE_FLAG_CMA
- * memory, this should be set to NULL.
- * @io_size: The size of a managed IO region (fixed memory or aperture).
- * @io_addr: Virtual kernel address if the io region is pre-mapped

[PATCH 01/14] drm/ttm: split no_wait argument in 2 GPU or reserve wait

2010-03-24 Thread Jerome Glisse
There is case where we want to be able to wait only for the
GPU while not waiting for other buffer to be unreserved. This
patch split the no_wait argument all the way down in the whole
ttm path so that upper level can decide on what to wait on or
not.

This patch break the API to other modules, update to others
driver are following in separate patches.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/ttm/ttm_bo.c  |   57 
 drivers/gpu/drm/ttm/ttm_bo_util.c |9 --
 include/drm/ttm/ttm_bo_api.h  |6 ++-
 include/drm/ttm/ttm_bo_driver.h   |   29 +++---
 4 files changed, 60 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 9db02bb..6f51b30 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -357,7 +357,8 @@ static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, 
bool zero_alloc)
 
 static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
  struct ttm_mem_reg *mem,
- bool evict, bool interruptible, bool no_wait)
+ bool evict, bool interruptible,
+ bool no_wait_reserve, bool no_wait_gpu)
 {
struct ttm_bo_device *bdev = bo-bdev;
bool old_is_pci = ttm_mem_reg_is_pci(bdev, bo-mem);
@@ -402,12 +403,12 @@ static int ttm_bo_handle_move_mem(struct 
ttm_buffer_object *bo,
 
if (!(old_man-flags  TTM_MEMTYPE_FLAG_FIXED) 
!(new_man-flags  TTM_MEMTYPE_FLAG_FIXED))
-   ret = ttm_bo_move_ttm(bo, evict, no_wait, mem);
+   ret = ttm_bo_move_ttm(bo, evict, no_wait_reserve, no_wait_gpu, 
mem);
else if (bdev-driver-move)
ret = bdev-driver-move(bo, evict, interruptible,
-no_wait, mem);
+no_wait_reserve, no_wait_gpu, mem);
else
-   ret = ttm_bo_move_memcpy(bo, evict, no_wait, mem);
+   ret = ttm_bo_move_memcpy(bo, evict, no_wait_reserve, 
no_wait_gpu, mem);
 
if (ret)
goto out_err;
@@ -606,7 +607,7 @@ void ttm_bo_unref(struct ttm_buffer_object **p_bo)
 EXPORT_SYMBOL(ttm_bo_unref);
 
 static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible,
-   bool no_wait)
+   bool no_wait_reserve, bool no_wait_gpu)
 {
struct ttm_bo_device *bdev = bo-bdev;
struct ttm_bo_global *glob = bo-glob;
@@ -615,7 +616,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool 
interruptible,
int ret = 0;
 
spin_lock(bo-lock);
-   ret = ttm_bo_wait(bo, false, interruptible, no_wait);
+   ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
spin_unlock(bo-lock);
 
if (unlikely(ret != 0)) {
@@ -638,7 +639,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool 
interruptible,
placement.num_busy_placement = 0;
bdev-driver-evict_flags(bo, placement);
ret = ttm_bo_mem_space(bo, placement, evict_mem, interruptible,
-   no_wait);
+   no_wait_reserve, no_wait_gpu);
if (ret) {
if (ret != -ERESTARTSYS) {
printk(KERN_ERR TTM_PFX
@@ -650,7 +651,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool 
interruptible,
}
 
ret = ttm_bo_handle_move_mem(bo, evict_mem, true, interruptible,
-no_wait);
+no_wait_reserve, no_wait_gpu);
if (ret) {
if (ret != -ERESTARTSYS)
printk(KERN_ERR TTM_PFX Buffer eviction failed\n);
@@ -670,7 +671,8 @@ out:
 
 static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
uint32_t mem_type,
-   bool interruptible, bool no_wait)
+   bool interruptible, bool no_wait_reserve,
+   bool no_wait_gpu)
 {
struct ttm_bo_global *glob = bdev-glob;
struct ttm_mem_type_manager *man = bdev-man[mem_type];
@@ -687,11 +689,11 @@ retry:
bo = list_first_entry(man-lru, struct ttm_buffer_object, lru);
kref_get(bo-list_kref);
 
-   ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
+   ret = ttm_bo_reserve_locked(bo, false, no_wait_reserve, false, 0);
 
if (unlikely(ret == -EBUSY)) {
spin_unlock(glob-lru_lock);
-   if (likely(!no_wait))
+   if (likely(!no_wait_gpu))
ret = ttm_bo_wait_unreserved(bo, interruptible);
 
kref_put(bo-list_kref, ttm_bo_release_list);
@@ -713,7 +715,7 @@ retry:
while (put_count--)
kref_put(bo-list_kref, ttm_bo_ref_bug);
 
-   ret = ttm_bo_evict(bo

[PATCH] drm/ttm: ttm_fault callback to allow driver to handle bo placement V3

2010-03-24 Thread Jerome Glisse
On fault the driver is given the opportunity to perform any operation
it sees fit in order to place the buffer into a CPU visible area of
memory. This patch doesn't break TTM users, nouveau, vmwgfx and radeon
should keep working properly. Future patch will take advantage of this
infrastructure and remove the old path from TTM once driver are
converted.

V2 return VM_FAULT_NOPAGE if callback return -EBUSY or -ERESTARTSYS
V3 balance io_mem_reserve and io_mem_free call, fault_reserve_notify
   is responsible to perform any necessary task for mapping to succeed

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/ttm/ttm_bo.c  |7 ++-
 drivers/gpu/drm/ttm/ttm_bo_util.c |   95 ++---
 drivers/gpu/drm/ttm/ttm_bo_vm.c   |   46 --
 include/drm/ttm/ttm_bo_api.h  |   21 
 include/drm/ttm/ttm_bo_driver.h   |   16 ++-
 5 files changed, 108 insertions(+), 77 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 6f51b30..3131416 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -632,6 +632,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool 
interruptible,
 
evict_mem = bo-mem;
evict_mem.mm_node = NULL;
+   atomic_set(evict_mem.bus.use_count, 0);
 
placement.fpfn = 0;
placement.lpfn = 0;
@@ -1005,6 +1006,7 @@ int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
mem.num_pages = bo-num_pages;
mem.size = mem.num_pages  PAGE_SHIFT;
mem.page_alignment = bo-mem.page_alignment;
+   atomic_set(mem.bus.use_count, 0);
/*
 * Determine where to move the buffer.
 */
@@ -1147,6 +1149,7 @@ int ttm_bo_init(struct ttm_bo_device *bdev,
kref_init(bo-list_kref);
atomic_set(bo-cpu_writers, 0);
atomic_set(bo-reserved, 1);
+   atomic_set(bo-mem.bus.use_count, 0);
init_waitqueue_head(bo-event_queue);
INIT_LIST_HEAD(bo-lru);
INIT_LIST_HEAD(bo-ddestroy);
@@ -1574,7 +1577,7 @@ int ttm_bo_pci_offset(struct ttm_bo_device *bdev,
if (ttm_mem_reg_is_pci(bdev, mem)) {
*bus_offset = mem-mm_node-start  PAGE_SHIFT;
*bus_size = mem-num_pages  PAGE_SHIFT;
-   *bus_base = man-io_offset;
+   *bus_base = man-io_offset + (uintptr_t)man-io_addr;
}
 
return 0;
@@ -1588,8 +1591,8 @@ void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo)
 
if (!bdev-dev_mapping)
return;
-
unmap_mapping_range(bdev-dev_mapping, offset, holelen, 1);
+   ttm_mem_io_free(bdev, bo-mem);
 }
 EXPORT_SYMBOL(ttm_bo_unmap_virtual);
 
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 865b2a8..f8ed48e 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -81,30 +81,56 @@ int ttm_bo_move_ttm(struct ttm_buffer_object *bo,
 }
 EXPORT_SYMBOL(ttm_bo_move_ttm);
 
+int ttm_mem_io_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
+{
+   int ret;
+
+   if (bdev-driver-io_mem_reserve) {
+   if (!atomic_xchg(mem-bus.use_count, 1)) {
+   ret = bdev-driver-io_mem_reserve(bdev, mem);
+   if (unlikely(ret != 0))
+   return ret;
+   }
+   } else {
+   ret = ttm_bo_pci_offset(bdev, mem, mem-bus.base, 
mem-bus.offset, mem-bus.size);
+   if (unlikely(ret != 0))
+   return ret;
+   mem-bus.is_iomem = (mem-bus.size  0) ? 1 : 0;
+   }
+   return 0;
+}
+
+void ttm_mem_io_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
+{
+   if (bdev-driver-io_mem_reserve) {
+   atomic_set(mem-bus.use_count, 0);
+   bdev-driver-io_mem_free(bdev, mem);
+   }
+}
+
 int ttm_mem_reg_ioremap(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem,
void **virtual)
 {
struct ttm_mem_type_manager *man = bdev-man[mem-mem_type];
-   unsigned long bus_offset;
-   unsigned long bus_size;
-   unsigned long bus_base;
int ret;
void *addr;
 
*virtual = NULL;
-   ret = ttm_bo_pci_offset(bdev, mem, bus_base, bus_offset, bus_size);
-   if (ret || bus_size == 0)
+   ret = ttm_mem_io_reserve(bdev, mem);
+   if (ret)
return ret;
 
-   if (!(man-flags  TTM_MEMTYPE_FLAG_NEEDS_IOREMAP))
-   addr = (void *)(((u8 *) man-io_addr) + bus_offset);
-   else {
+   if (!(man-flags  TTM_MEMTYPE_FLAG_NEEDS_IOREMAP)) {
+   addr = (void *)(mem-bus.base + mem-bus.offset);
+   } else {
if (mem-placement  TTM_PL_FLAG_WC)
-   addr = ioremap_wc(bus_base + bus_offset, bus_size);
+   addr = ioremap_wc(mem-bus.base + mem-bus.offset, 
mem-bus.size);
else

Re: [PATCH] drm/ttm: ttm_fault callback to allow driver to handle bo placement V3

2010-03-24 Thread Jerome Glisse
On Wed, Mar 24, 2010 at 07:27:57PM +0100, Thomas Hellstrom wrote:
 Jerome Glisse wrote:
  On fault the driver is given the opportunity to perform any operation
  it sees fit in order to place the buffer into a CPU visible area of
  memory. This patch doesn't break TTM users, nouveau, vmwgfx and radeon
  should keep working properly. Future patch will take advantage of this
  infrastructure and remove the old path from TTM once driver are
  converted.
 
  V2 return VM_FAULT_NOPAGE if callback return -EBUSY or -ERESTARTSYS
  V3 balance io_mem_reserve and io_mem_free call, fault_reserve_notify
 is responsible to perform any necessary task for mapping to succeed
 
  Signed-off-by: Jerome Glisse jgli...@redhat.com
  ---
   drivers/gpu/drm/ttm/ttm_bo.c  |7 ++-
   drivers/gpu/drm/ttm/ttm_bo_util.c |   95 
  ++---
   drivers/gpu/drm/ttm/ttm_bo_vm.c   |   46 --
   include/drm/ttm/ttm_bo_api.h  |   21 
   include/drm/ttm/ttm_bo_driver.h   |   16 ++-
   5 files changed, 108 insertions(+), 77 deletions(-)
 
  @@ -1588,8 +1591,8 @@ void ttm_bo_unmap_virtual(struct ttm_buffer_object 
  *bo)
   
  if (!bdev-dev_mapping)
  return;
  -

 
 Still a lot of these. Please remove.
   
  +int ttm_mem_io_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
  +{
  +   int ret;
  +
  +   if (bdev-driver-io_mem_reserve) {
  +   if (!atomic_xchg(mem-bus.use_count, 1)) {
  +   ret = bdev-driver-io_mem_reserve(bdev, mem);
  +   if (unlikely(ret != 0))
  +   return ret;
  +   }
  +   } else {
  +   ret = ttm_bo_pci_offset(bdev, mem, mem-bus.base, 
  mem-bus.offset, mem-bus.size);
  +   if (unlikely(ret != 0))
  +   return ret;
  +   mem-bus.is_iomem = (mem-bus.size  0) ? 1 : 0;
  +   }
  +   return 0;
  +}
  +
  +void ttm_mem_io_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
  +{
  +   if (bdev-driver-io_mem_reserve) {
  +   atomic_set(mem-bus.use_count, 0);

 
 Shouldn't there be a test for zero before calling this?
  +   bdev-driver-io_mem_free(bdev, mem);
  +   }
  +}
  +

 
 Hmm. I don't get the logic of the above refcounting. First, the kernel 
 can preempt between refcounting and driver calls:
 
 Thread a sets use_count to 1 and preempts.
 Thread b sees use_count 1 and calls ttm_bo_pci_offset(), but we haven't 
 yet done an io_mem_reserve??
 
 Otoh, from the last section it seems we always will hold bo::reserved 
 around these calls, so instead we could make use_count a non-atomic 
 variable.
 

use_count could become bool io_reserved it's atomic for patch historical
reason, will respawn a patch without atomic and blank line removal.

 Then, for the rest please consider the following use cases:
 
 1) We want to temporarily map a bo within the kernel. We do:
 reserve_bo().
 make_mappable().
 kmap().
 kunmap().
 free_mappable_resources().  // This is just a hint. When the bo is 
 unreserved, the manager is free to evict the mappable region.
 unreserve_bo().
 
 2) We want to permanently map a bo within the kernel (kernel map for fbdev).
 We do (bo is not reserved).
 pin_mappable().
 kmap().
 access
 kunmap().
 unpin_mappable().
 
 3) Fault.
 reserve_bo().
 make_mappable().
 set_up user_space_map().
 unreserve_bo().
 
 /// Here the manager is free to evict the mappable range by reserving 
 and then calling ttm_bo_unmap_virtual().
 
 4) Unmap Virtual. (Called reserved).
 unmap_user_space_mappings().
 free_mappable_resources().
 
 It looks to me like you've implemented make_mappable() = 
 ttm_mem_io_reserve() and free_mappable_resources() = ttm_mem_io_free(), 
 and from the above use cases we can see that they always will be called 
 when the bo is reserved. Hence no need for an atomic variable and we can 
 ignore the race scenarios above.
 
 but what about pin_mappable() and unpin_mappable()? A general mapping 
 manager must be able to perform these operations. Perhaps

Idea is that buffer that will be mapped the whole time will also be
set with no evict so unmap virtual is never call on them (at least
that is my feeling from the code). So iomeme_reserve works also for
pinned buffer and i don't separate the pined/not pinned case from
the driver io manager (if driver has any).

 
 
 Finally, consider a very simple mapping manager that uses the two ttm 
 regions VRAM and VRAM_MAPPABLE. We'd implement the driver operations as 
 follows, assuming we add io_mem_pin and io_mem_unpin:
 
 io_mem_reserve:
 ttm_bo_validate(VRAM_MAPPABLE); (Evicting as needed).
 
 io_mem_unreserve:
 noop().
 
 io_mem_pin:
 ttm_bo_reserve()
 if (pin_count++ == 0)
ttm_bo_validate(VRAM_MAPPABLE | NO_EVICT);
 ttm_bo_unreserve()
 
 io_mem_unpin:
 ttm_bo_reserve()
 if (--pin_count == 0)
ttm_bo_validate(VRAM_MAPPABLE);
 ttm_bo_unreserve()
 
 This simple mapping manager would need a struct ttm_buffer_object

Re: [PATCH] drm/ttm: ttm_fault callback to allow driver to handle bo placement V3

2010-03-24 Thread Jerome Glisse
On Wed, Mar 24, 2010 at 09:08:08PM +0100, Thomas Hellstrom wrote:
 Jerome Glisse wrote:
 On Wed, Mar 24, 2010 at 07:27:57PM +0100, Thomas Hellstrom wrote:
 Jerome Glisse wrote:
 On fault the driver is given the opportunity to perform any operation
 it sees fit in order to place the buffer into a CPU visible area of
 memory. This patch doesn't break TTM users, nouveau, vmwgfx and radeon
 should keep working properly. Future patch will take advantage of this
 infrastructure and remove the old path from TTM once driver are
 converted.
 
 V2 return VM_FAULT_NOPAGE if callback return -EBUSY or -ERESTARTSYS
 V3 balance io_mem_reserve and io_mem_free call, fault_reserve_notify
is responsible to perform any necessary task for mapping to succeed
 
 Signed-off-by: Jerome Glisse jgli...@redhat.com
 ---
  drivers/gpu/drm/ttm/ttm_bo.c  |7 ++-
  drivers/gpu/drm/ttm/ttm_bo_util.c |   95 
  ++---
  drivers/gpu/drm/ttm/ttm_bo_vm.c   |   46 --
  include/drm/ttm/ttm_bo_api.h  |   21 
  include/drm/ttm/ttm_bo_driver.h   |   16 ++-
  5 files changed, 108 insertions(+), 77 deletions(-)
 
 @@ -1588,8 +1591,8 @@ void ttm_bo_unmap_virtual(struct ttm_buffer_object 
 *bo)
if (!bdev-dev_mapping)
return;
 -
 Still a lot of these. Please remove.
 +int ttm_mem_io_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg 
 *mem)
 +{
 +  int ret;
 +
 +  if (bdev-driver-io_mem_reserve) {
 +  if (!atomic_xchg(mem-bus.use_count, 1)) {
 +  ret = bdev-driver-io_mem_reserve(bdev, mem);
 +  if (unlikely(ret != 0))
 +  return ret;
 +  }
 +  } else {
 +  ret = ttm_bo_pci_offset(bdev, mem, mem-bus.base, 
 mem-bus.offset, mem-bus.size);
 +  if (unlikely(ret != 0))
 +  return ret;
 +  mem-bus.is_iomem = (mem-bus.size  0) ? 1 : 0;
 +  }
 +  return 0;
 +}
 +
 +void ttm_mem_io_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
 +{
 +  if (bdev-driver-io_mem_reserve) {
 +  atomic_set(mem-bus.use_count, 0);
 Shouldn't there be a test for zero before calling this?
 +  bdev-driver-io_mem_free(bdev, mem);
 +  }
 +}
 +
 Hmm. I don't get the logic of the above refcounting. First, the
 kernel can preempt between refcounting and driver calls:
 
 Thread a sets use_count to 1 and preempts.
 Thread b sees use_count 1 and calls ttm_bo_pci_offset(), but we
 haven't yet done an io_mem_reserve??
 
 Otoh, from the last section it seems we always will hold
 bo::reserved around these calls, so instead we could make
 use_count a non-atomic variable.
 
 
 use_count could become bool io_reserved it's atomic for patch historical
 reason, will respawn a patch without atomic and blank line removal.
 
 Great. Please also consider the test for 0 on unreserve.
 
 Then, for the rest please consider the following use cases:
 
 1) We want to temporarily map a bo within the kernel. We do:
 reserve_bo().
 make_mappable().
 kmap().
 kunmap().
 free_mappable_resources().  // This is just a hint. When the bo
 is unreserved, the manager is free to evict the mappable region.
 unreserve_bo().
 
 2) We want to permanently map a bo within the kernel (kernel map for fbdev).
 We do (bo is not reserved).
 pin_mappable().
 kmap().
 access
 kunmap().
 unpin_mappable().
 
 3) Fault.
 reserve_bo().
 make_mappable().
 set_up user_space_map().
 unreserve_bo().
 
 /// Here the manager is free to evict the mappable range by
 reserving and then calling ttm_bo_unmap_virtual().
 
 4) Unmap Virtual. (Called reserved).
 unmap_user_space_mappings().
 free_mappable_resources().
 
 It looks to me like you've implemented make_mappable() =
 ttm_mem_io_reserve() and free_mappable_resources() =
 ttm_mem_io_free(), and from the above use cases we can see that
 they always will be called when the bo is reserved. Hence no
 need for an atomic variable and we can ignore the race scenarios
 above.
 
 but what about pin_mappable() and unpin_mappable()? A general
 mapping manager must be able to perform these operations.
 Perhaps
 
 Idea is that buffer that will be mapped the whole time will also be
 set with no evict so unmap virtual is never call on them (at least
 that is my feeling from the code). So iomeme_reserve works also for
 pinned buffer and i don't separate the pined/not pinned case from
 the driver io manager (if driver has any).
 
 Yes, That's the case for simple io managers, where the mappable
 range is simply a (sub)TTM region. Then TTM NO_EVICT is equivalent
 to io manager NO_EVICT. However, if the IO manager is not a TTM
 region manager but something the driver implements with its own LRU
 list, the IO manager must be informed about this. Admitted, we have
 no driver like this yet, and the common code won't be using that
 API, so I'm OK with leaving it for now. I might add a comment about
 this close to the io manager hooks later on.
 
 Finally, consider a very simple mapping manager

Re: Unmappable VRAM patchset V4

2010-03-17 Thread Jerome Glisse
On Mon, Mar 01, 2010 at 01:03:38PM +0100, Thomas Hellstrom wrote:
 Dave Airlie wrote:
  On Fri, Feb 26, 2010 at 3:01 AM, Jerome Glisse jgli...@redhat.com wrote:

  Updated patchset, to apply cleanly on top of TTM split no_wait argument.
  Compile tested for nouveau+vmwgfx, test in progress for radeon.
 
  So with the new change radeon won't wait for bo reserving other bo
  in fault path but will wait the GPU (hoping it doesn't lockup ;))
  This should address concern about the wait/locking issue.
  
 
  Thomas any time for this yet? I'd like to pull this in obviously, but
  it would be nice to know if Jerome has addressed all concerns.
 
  Dave.

 Hi Dave!
 My schedule is currently a bit tight. I think the immediate deadlock 
 concerns are met, but I'd to take a deeper look at some things that look 
 a bit suspicious, but I think the overall approach is ok.
 I'll hopefully be able to do a review on wednesday.
 
 /Thomas
 

Thomas any chance to review this ? NVidia patch already need update
and i would like to avoid having this bitrot too much.

Cheers,
Jerome

--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


[PATCH] drm/radeon/kms: avoid possible oops (call gart_fini before gart_disable)

2010-03-17 Thread Jerome Glisse
radeon_gart_fini might call GART unbind callback function which
might try to access GART table but if gart_disable is call first
the GART table will be unmapped so any access to it will oops.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/radeon/r100.c  |2 +-
 drivers/gpu/drm/radeon/r300.c  |2 +-
 drivers/gpu/drm/radeon/r600.c  |2 +-
 drivers/gpu/drm/radeon/rs400.c |2 +-
 drivers/gpu/drm/radeon/rs600.c |2 +-
 drivers/gpu/drm/radeon/rv770.c |2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 9fef5cc..7a1180d 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -236,9 +236,9 @@ int r100_pci_gart_set_page(struct radeon_device *rdev, int 
i, uint64_t addr)
 
 void r100_pci_gart_fini(struct radeon_device *rdev)
 {
+   radeon_gart_fini(rdev);
r100_pci_gart_disable(rdev);
radeon_gart_table_ram_free(rdev);
-   radeon_gart_fini(rdev);
 }
 
 int r100_irq_set(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 6a0b550..756bd4d 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -170,9 +170,9 @@ void rv370_pcie_gart_disable(struct radeon_device *rdev)
 
 void rv370_pcie_gart_fini(struct radeon_device *rdev)
 {
+   radeon_gart_fini(rdev);
rv370_pcie_gart_disable(rdev);
radeon_gart_table_vram_free(rdev);
-   radeon_gart_fini(rdev);
 }
 
 void r300_fence_ring_emit(struct radeon_device *rdev,
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 3df2ab1..a948947 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -492,9 +492,9 @@ void r600_pcie_gart_disable(struct radeon_device *rdev)
 
 void r600_pcie_gart_fini(struct radeon_device *rdev)
 {
+   radeon_gart_fini(rdev);
r600_pcie_gart_disable(rdev);
radeon_gart_table_vram_free(rdev);
-   radeon_gart_fini(rdev);
 }
 
 void r600_agp_enable(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c
index a9e06b0..e8065bd 100644
--- a/drivers/gpu/drm/radeon/rs400.c
+++ b/drivers/gpu/drm/radeon/rs400.c
@@ -203,9 +203,9 @@ void rs400_gart_disable(struct radeon_device *rdev)
 
 void rs400_gart_fini(struct radeon_device *rdev)
 {
+   radeon_gart_fini(rdev);
rs400_gart_disable(rdev);
radeon_gart_table_ram_free(rdev);
-   radeon_gart_fini(rdev);
 }
 
 int rs400_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
index 5c73c0f..7d8ae42 100644
--- a/drivers/gpu/drm/radeon/rs600.c
+++ b/drivers/gpu/drm/radeon/rs600.c
@@ -340,9 +340,9 @@ void rs600_gart_disable(struct radeon_device *rdev)
 
 void rs600_gart_fini(struct radeon_device *rdev)
 {
+   radeon_gart_fini(rdev);
rs600_gart_disable(rdev);
radeon_gart_table_vram_free(rdev);
-   radeon_gart_fini(rdev);
 }
 
 #define R600_PTE_VALID (1  0)
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 3f477e7..2b8a4e1 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -126,9 +126,9 @@ void rv770_pcie_gart_disable(struct radeon_device *rdev)
 
 void rv770_pcie_gart_fini(struct radeon_device *rdev)
 {
+   radeon_gart_fini(rdev);
rv770_pcie_gart_disable(rdev);
radeon_gart_table_vram_free(rdev);
-   radeon_gart_fini(rdev);
 }
 
 
-- 
1.6.6.1


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


Re: Unmappable VRAM patchset V4

2010-03-17 Thread Jerome Glisse
On Wed, Mar 17, 2010 at 02:01:47PM +0100, Thomas Hellstrom wrote:
 Jerome Glisse wrote:
 On Mon, Mar 01, 2010 at 01:03:38PM +0100, Thomas Hellstrom wrote:
 Dave Airlie wrote:
 On Fri, Feb 26, 2010 at 3:01 AM, Jerome Glisse jgli...@redhat.com wrote:
 Updated patchset, to apply cleanly on top of TTM split no_wait argument.
 Compile tested for nouveau+vmwgfx, test in progress for radeon.
 
 So with the new change radeon won't wait for bo reserving other bo
 in fault path but will wait the GPU (hoping it doesn't lockup ;))
 This should address concern about the wait/locking issue.
 Thomas any time for this yet? I'd like to pull this in obviously, but
 it would be nice to know if Jerome has addressed all concerns.
 
 Dave.
 Hi Dave!
 My schedule is currently a bit tight. I think the immediate
 deadlock concerns are met, but I'd to take a deeper look at some
 things that look a bit suspicious, but I think the overall
 approach is ok.
 I'll hopefully be able to do a review on wednesday.
 
 /Thomas
 
 
 Thomas any chance to review this ? NVidia patch already need update
 and i would like to avoid having this bitrot too much.
 
 Cheers,
 Jerome
 Jerome,
 
 I've reviewed the TTM patch, see previous mail. I'll look at the
 vmware patch and briefly the other ones as soon as we've sorted out
 how to address the issues raised in the review.
 
 Thanks,
 /Thomas


Will redo a patch tomorrow to try to address issue you pointed.

Cheers,
Jerome

--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


[PATCH] drm/radeon/kms: fix typo in r520 asic functions

2010-03-16 Thread Jerome Glisse
This will fix suspend/resume on r520 asic.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/radeon/radeon_asic.c |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_asic.c 
b/drivers/gpu/drm/radeon/radeon_asic.c
index 3e40bc4..a5be6e0 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -480,8 +480,8 @@ static struct radeon_asic rv515_asic = {
 static struct radeon_asic r520_asic = {
.init = r520_init,
.fini = rv515_fini,
-   .suspend = rs600_suspend,
-   .resume = rs600_resume,
+   .suspend = rv515_suspend,
+   .resume = r520_resume,
.vga_set_state = r100_vga_set_state,
.gpu_is_lockup = r300_gpu_is_lockup,
.asic_reset = rs600_asic_reset,
-- 
1.6.6.1


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


Re: [PATCH 0/5] clean up radeon_asic.h v2

2010-03-12 Thread Jerome Glisse
On Thu, Mar 11, 2010 at 10:19:13PM +0100, Daniel Vetter wrote:
 Hi all,
 
 All new patch pile to make radeon_asic.h into a real header file. Now all
 the asic structs are gathered in the new radeon_asic.c file.
 
 Tested on my rv570.
 
 I've also added a new patch that gathers all r100 specific declarations
 into radeon_asic.h (at least where it makes sense). This is just an example
 to convince Jerome that radeon_asic.h might not be totally useless ;)
 
 Again, comments higly welcome.
 
 Yours, Daniel
 

I would merge patch 1  2 into a single patch, also i think you
include radeon_asic.h at top of radeon.h so everyfile would also
include radeon_asic.h that would be simplier than adding include
to file and reduce the likelyhood to forget doing so in the future.
(you might need to add forward declaration like struct radeon; at
top of radeon_asic.h no biggy thought).

Cheers,
Jerome

--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


Re: [PATCH 00/14] cleanup radeon_asic.h

2010-03-11 Thread Jerome Glisse
On Thu, Mar 11, 2010 at 02:06:02PM +0100, Daniel Vetter wrote:
 Hi all,
 
 This patch pile moves the static struct radeon_asic asic definitions
 form radeon_asic.h into the asic-specific files, where I think they belong.
 This way radeon_asic.h becomes a real header file that can be #included.
 And indeed, with all the copypasting of function declarations, one has
 gotten out of sync.
 
 The next step would be to collect asic specific declarations in
 radeon_asic.h - atm they are somewhat scattered. But this can easily be
 done on the go and has way too much potential for conflicts with other
 patches. So I didn't do this.
 
 Tested on my rv570.
 
 Comments higly welcome.
 
 Yours, Daniel

It all looks good from a quick read through of the patches. For
gathering asic function prototype i kind of started adding them
to radeon.h at the bottom (there is already a bunch of them). Thus
i think radeon_asic.h can be kill and extern declaration directly
put into radeon_asic.c

Cheers,
Jerome

 
 Daniel Vetter (14):
   drm/radoen: move r100 asic struct to r100.c
   drm/radoen: move r200 asic struct to r200.c
   drm/radeon: move r300 asic structs to r300.c
   drm/radeon: move r420 asic struct to r420.c
   drm/radoen: move rs400 asic struct to rs400.c
   drm/radoen: move rs600 asic struct to rs600.c
   drm/radoen: move rs690 asic struct to rs690.c
   drm/radoen: move rv515 asic struct to rv515.c
   drm/radoen: move r520 asic struct to r520.c
   drm/radoen: move r600 asic struct to r600.c
   drm/radoen: move rv770 asic struct to rv770.c
   drm/radoen: move evergreen asic struct to evergreen.c
   drm/radoen: unconfuse return value of radeon_asic-clear_surface_reg
   drm/radeon: include radeon_asic.h in asic.c
 
  drivers/gpu/drm/radeon/evergreen.c   |   39 +++-
  drivers/gpu/drm/radeon/r100.c|   39 +++
  drivers/gpu/drm/radeon/r200.c|   38 +++
  drivers/gpu/drm/radeon/r300.c|   76 ++
  drivers/gpu/drm/radeon/r420.c|   39 +++
  drivers/gpu/drm/radeon/r520.c|   39 +++
  drivers/gpu/drm/radeon/r600.c|   43 +++-
  drivers/gpu/drm/radeon/radeon.h  |3 +-
  drivers/gpu/drm/radeon/radeon_asic.h |  494 
 ++
  drivers/gpu/drm/radeon/rs400.c   |   39 +++
  drivers/gpu/drm/radeon/rs600.c   |   43 +++-
  drivers/gpu/drm/radeon/rs690.c   |   39 +++
  drivers/gpu/drm/radeon/rv515.c   |   41 +++-
  drivers/gpu/drm/radeon/rv770.c   |   42 +++-
  14 files changed, 518 insertions(+), 496 deletions(-)
 
 
 --
 Download Intel#174; Parallel Studio Eval
 Try the new software tools for yourself. Speed compiling, find bugs
 proactively, and fine-tune applications for parallel performance.
 See why Intel Parallel Studio got high marks during beta.
 http://p.sf.net/sfu/intel-sw-dev
 --
 ___
 Dri-devel mailing list
 Dri-devel@lists.sourceforge.net
 https://lists.sourceforge.net/lists/listinfo/dri-devel
 

--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


Re: [PATCH 00/14] cleanup radeon_asic.h

2010-03-11 Thread Jerome Glisse
On Thu, Mar 11, 2010 at 05:24:22PM +0100, Rafał Miłecki wrote:
 2010/3/11 Alex Deucher alexdeuc...@gmail.com:
  I like keeping all the asic definitions in one file as you tend to
  need to update them all at one time and having them spread across all
  the asic files increases the likelihood of one or more of them getting
  missed.  But I can live with it if other folks think it's a good idea.
 
 Same here. One file means easier editing. Maybe we could use some
 other of proposed tricks?
 
 -- 
 Rafał

I don't have strong feeling but Alex has a point, right now we often
update them, maybe we should add radeon_asic.c and move asic init
(function now in radeon_device.c) along structure there.

Cheers,
Jerome

--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


[PATCH 1/3] drm/radeon/kms: fence cleanup + more reliable GPU lockup detection V4

2010-03-09 Thread Jerome Glisse
This patch cleanup the fence code, it drops the timeout field of
fence as the time to complete each IB is unpredictable and shouldn't
be bound.

The fence cleanup lead to GPU lockup detection improvement, this
patch introduce a callback, allowing to do asic specific test for
lockup detection. In this patch the CP is use as a first indicator
of GPU lockup. If CP doesn't make progress during 1second we assume
we are facing a GPU lockup.

To avoid overhead of testing GPU lockup frequently due to fence
taking time to be signaled we query the lockup callback every
500msec. There is plenty code comment explaining the design  choise
inside the code.

This have been tested mostly on R3XX/R5XX hw, in normal running
destkop (compiz firefox, quake3 running) the lockup callback wasn't
call once (1 hour session). Also tested with forcing GPU lockup and
lockup was reported after the 1s CP activity timeout.

V2 switch to 500ms timeout so GPU lockup get call at least 2 times
   in less than 2sec.
V3 store last jiffies in fence struct so on ERESTART, EBUSY we keep
   track of how long we already wait for a given fence
V4 make sure we got up to date cp read pointer so we don't have
   false positive

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/radeon/evergreen.c|6 ++
 drivers/gpu/drm/radeon/r100.c |   86 +++
 drivers/gpu/drm/radeon/r300.c |   28 -
 drivers/gpu/drm/radeon/r600.c |   34 ++-
 drivers/gpu/drm/radeon/radeon.h   |  104 +++--
 drivers/gpu/drm/radeon/radeon_asic.h  |   20 ++-
 drivers/gpu/drm/radeon/radeon_fence.c |  102 +---
 drivers/gpu/drm/radeon/rv770.c|6 --
 8 files changed, 280 insertions(+), 106 deletions(-)

diff --git a/drivers/gpu/drm/radeon/evergreen.c 
b/drivers/gpu/drm/radeon/evergreen.c
index bd2e7aa..8988df7 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -490,6 +490,12 @@ int evergreen_mc_init(struct radeon_device *rdev)
return 0;
 }
 
+bool evergreen_gpu_is_lockup(struct radeon_device *rdev)
+{
+   /* FIXME: implement for evergreen */
+   return false;
+}
+
 int evergreen_gpu_reset(struct radeon_device *rdev)
 {
/* FIXME: implement for evergreen */
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 91eb762..e4487f3 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -1772,6 +1772,92 @@ int r100_rb2d_reset(struct radeon_device *rdev)
return -1;
 }
 
+void r100_gpu_lockup_update(struct r100_gpu_lockup *lockup, struct radeon_cp 
*cp)
+{
+   lockup-last_cp_rptr = cp-rptr;
+   lockup-last_jiffies = jiffies;
+}
+
+/**
+ * r100_gpu_cp_is_lockup() - check if CP is lockup by recording information
+ * @rdev:  radeon device structure
+ * @lockup:r100_gpu_lockup structure holding CP lockup tracking 
informations
+ * @cp:radeon_cp structure holding CP information
+ *
+ * We don't need to initialize the lockup tracking information as we will 
either
+ * have CP rptr to a different value of jiffies wrap around which will force
+ * initialization of the lockup tracking informations.
+ *
+ * A possible false positivie is if we get call after while and last_cp_rptr ==
+ * the current CP rptr, even if it's unlikely it might happen. To avoid this
+ * if the elapsed time since last call is bigger than 2 second than we return
+ * false and update the tracking information. Due to this the caller must call
+ * r100_gpu_cp_is_lockup several time in less than 2sec for lockup to be 
reported
+ * the fencing code should be cautious about that.
+ *
+ * Caller should write to the ring to force CP to do something so we don't get
+ * false positive when CP is just gived nothing to do.
+ *
+ **/
+bool r100_gpu_cp_is_lockup(struct radeon_device *rdev, struct r100_gpu_lockup 
*lockup, struct radeon_cp *cp)
+{
+   unsigned long cjiffies, elapsed;
+
+   cjiffies = jiffies;
+   if (!time_after(cjiffies, lockup-last_jiffies)) {
+   /* likely a wrap around */
+   lockup-last_cp_rptr = cp-rptr;
+   lockup-last_jiffies = jiffies;
+   return false;
+   }
+   if (cp-rptr != lockup-last_cp_rptr) {
+   /* CP is still working no lockup */
+   lockup-last_cp_rptr = cp-rptr;
+   lockup-last_jiffies = jiffies;
+   return false;
+   }
+   elapsed = jiffies_to_msecs(cjiffies - lockup-last_jiffies);
+   if (elapsed = 3000) {
+   /* very likely the improbable case where current
+* rptr is equal to last recorded, a while ago, rptr
+* this is more likely a false positive update tracking
+* information which should force us to be recall at
+* latter point
+*/
+   lockup-last_cp_rptr = cp

[PATCH 3/3] drm/radeon/kms: simplify improve GPU reset V2

2010-03-09 Thread Jerome Glisse
This simplify and improve GPU reset for R1XX-R6XX hw, it's
not 100% reliable here are result:
- R1XX/R2XX works bunch of time in a row, sometimes it
  seems it can work indifinitly
- R3XX/R3XX the most unreliable one, sometimes you will be
  able to reset few times, sometimes not even once
- R5XX more reliable than previous hw, seems to work most
  of the times but once in a while it fails for no obvious
  reasons (same status than previous reset just no same
  happy ending)
- R6XX/R7XX are lot more reliable with this patch, still
  it seems that it can fail after a bunch (reset every
  2sec for 3hour bring down the GPU  computer)

This have been tested on various hw, for some odd reasons
i wasn't able to lockup RS480/RS690 (while they use to
love locking up).

Note that on R1XX-R5XX the cursor will disapear after
lockup haven't checked why, switch to console and back
to X will restore cursor.

Next step is to record the bogus command that leaded to
the lockup.

V2 Fix r6xx resume path to avoid reinitializing blit
module, use the gpu_lockup boolean to avoid entering
inifinite waiting loop on fence while reiniting the GPU

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/radeon/r100.c  |  180 +++
 drivers/gpu/drm/radeon/r100d.h |  128 ++
 drivers/gpu/drm/radeon/r300.c  |  134 +++-
 drivers/gpu/drm/radeon/r300d.h |   47 -
 drivers/gpu/drm/radeon/r520.c  |1 -
 drivers/gpu/drm/radeon/r600.c  |   53 +-
 drivers/gpu/drm/radeon/r600_blit_kms.c |3 +
 drivers/gpu/drm/radeon/radeon.h|4 +-
 drivers/gpu/drm/radeon/radeon_asic.h   |   12 +-
 drivers/gpu/drm/radeon/radeon_cs.c |4 -
 drivers/gpu/drm/radeon/radeon_device.c |   22 
 drivers/gpu/drm/radeon/radeon_fence.c  |   13 ++-
 drivers/gpu/drm/radeon/radeon_gart.c   |2 +-
 drivers/gpu/drm/radeon/rs400.c |2 -
 drivers/gpu/drm/radeon/rs600.c |   73 +-
 drivers/gpu/drm/radeon/rs600d.h|   46 
 drivers/gpu/drm/radeon/rs690.c |2 -
 drivers/gpu/drm/radeon/rv515.c |   90 
 drivers/gpu/drm/radeon/rv515d.h|   46 
 19 files changed, 508 insertions(+), 354 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 5594e71..a57939a 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -659,26 +659,6 @@ int r100_cp_init(struct radeon_device *rdev, unsigned 
ring_size)
if (r100_debugfs_cp_init(rdev)) {
DRM_ERROR(Failed to register debugfs file for CP !\n);
}
-   /* Reset CP */
-   tmp = RREG32(RADEON_CP_CSQ_STAT);
-   if ((tmp  (1  31))) {
-   DRM_INFO(radeon: cp busy (0x%08X) resetting\n, tmp);
-   WREG32(RADEON_CP_CSQ_MODE, 0);
-   WREG32(RADEON_CP_CSQ_CNTL, 0);
-   WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_CP);
-   tmp = RREG32(RADEON_RBBM_SOFT_RESET);
-   mdelay(2);
-   WREG32(RADEON_RBBM_SOFT_RESET, 0);
-   tmp = RREG32(RADEON_RBBM_SOFT_RESET);
-   mdelay(2);
-   tmp = RREG32(RADEON_CP_CSQ_STAT);
-   if ((tmp  (1  31))) {
-   DRM_INFO(radeon: cp reset failed (0x%08X)\n, tmp);
-   }
-   } else {
-   DRM_INFO(radeon: cp idle (0x%08X)\n, tmp);
-   }
-
if (!rdev-me_fw) {
r = r100_cp_init_microcode(rdev);
if (r) {
@@ -781,39 +761,6 @@ void r100_cp_disable(struct radeon_device *rdev)
}
 }
 
-int r100_cp_reset(struct radeon_device *rdev)
-{
-   uint32_t tmp;
-   bool reinit_cp;
-   int i;
-
-   reinit_cp = rdev-cp.ready;
-   rdev-cp.ready = false;
-   WREG32(RADEON_CP_CSQ_MODE, 0);
-   WREG32(RADEON_CP_CSQ_CNTL, 0);
-   WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_CP);
-   (void)RREG32(RADEON_RBBM_SOFT_RESET);
-   udelay(200);
-   WREG32(RADEON_RBBM_SOFT_RESET, 0);
-   /* Wait to prevent race in RBBM_STATUS */
-   mdelay(1);
-   for (i = 0; i  rdev-usec_timeout; i++) {
-   tmp = RREG32(RADEON_RBBM_STATUS);
-   if (!(tmp  (1  16))) {
-   DRM_INFO(CP reset succeed (RBBM_STATUS=0x%08X)\n,
-tmp);
-   if (reinit_cp) {
-   return r100_cp_init(rdev, rdev-cp.ring_size);
-   }
-   return 0;
-   }
-   DRM_UDELAY(1);
-   }
-   tmp = RREG32(RADEON_RBBM_STATUS);
-   DRM_ERROR(Failed to reset CP (RBBM_STATUS=0x%08X)!\n, tmp);
-   return -1;
-}
-
 void r100_cp_commit(struct radeon_device *rdev)
 {
WREG32(RADEON_CP_RB_WPTR, rdev-cp.wptr);
@@ -1727,51 +1674,6 @@ int r100_mc_wait_for_idle(struct radeon_device

[PATCH 2/3] drm/radeon/kms: rename gpu_reset to asic_reset

2010-03-09 Thread Jerome Glisse
Patch rename gpu_reset to asic_reset in prevision of having
gpu_reset doing more stuff than just basic asic reset.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/radeon/evergreen.c |2 +-
 drivers/gpu/drm/radeon/r100.c  |6 ++--
 drivers/gpu/drm/radeon/r300.c  |6 ++--
 drivers/gpu/drm/radeon/r420.c  |4 +-
 drivers/gpu/drm/radeon/r520.c  |4 +-
 drivers/gpu/drm/radeon/r600.c  |2 +-
 drivers/gpu/drm/radeon/radeon.h|6 ++--
 drivers/gpu/drm/radeon/radeon_asic.h   |   36 
 drivers/gpu/drm/radeon/radeon_device.c |2 +-
 drivers/gpu/drm/radeon/radeon_fence.c  |2 +-
 drivers/gpu/drm/radeon/rs400.c |4 +-
 drivers/gpu/drm/radeon/rs600.c |4 +-
 drivers/gpu/drm/radeon/rs690.c |4 +-
 drivers/gpu/drm/radeon/rv515.c |8 +++---
 14 files changed, 45 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/radeon/evergreen.c 
b/drivers/gpu/drm/radeon/evergreen.c
index 8988df7..f1a860c 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -496,7 +496,7 @@ bool evergreen_gpu_is_lockup(struct radeon_device *rdev)
return false;
 }
 
-int evergreen_gpu_reset(struct radeon_device *rdev)
+int evergreen_asic_reset(struct radeon_device *rdev)
 {
/* FIXME: implement for evergreen */
return 0;
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index e4487f3..5594e71 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -1858,7 +1858,7 @@ bool r100_gpu_is_lockup(struct radeon_device *rdev)
return r100_gpu_cp_is_lockup(rdev, rdev-config.r100.lockup, 
rdev-cp);
 }
 
-int r100_gpu_reset(struct radeon_device *rdev)
+int r100_asic_reset(struct radeon_device *rdev)
 {
uint32_t status;
 
@@ -3500,7 +3500,7 @@ int r100_resume(struct radeon_device *rdev)
/* Resume clock before doing reset */
r100_clock_startup(rdev);
/* Reset gpu before posting otherwise ATOM will enter infinite loop */
-   if (radeon_gpu_reset(rdev)) {
+   if (radeon_asic_reset(rdev)) {
dev_warn(rdev-dev, GPU reset failed ! (0xE40=0x%08X, 
0x7C0=0x%08X)\n,
RREG32(R_000E40_RBBM_STATUS),
RREG32(R_0007C0_CP_STAT));
@@ -3568,7 +3568,7 @@ int r100_init(struct radeon_device *rdev)
return r;
}
/* Reset gpu before posting otherwise ATOM will enter infinite loop */
-   if (radeon_gpu_reset(rdev)) {
+   if (radeon_asic_reset(rdev)) {
dev_warn(rdev-dev,
GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n,
RREG32(R_000E40_RBBM_STATUS),
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 23346f6..75ab7b0 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -447,7 +447,7 @@ bool r300_gpu_is_lockup(struct radeon_device *rdev)
return r100_gpu_cp_is_lockup(rdev, rdev-config.r300.lockup, 
rdev-cp);
 }
 
-int r300_gpu_reset(struct radeon_device *rdev)
+int r300_asic_reset(struct radeon_device *rdev)
 {
uint32_t status;
 
@@ -1330,7 +1330,7 @@ int r300_resume(struct radeon_device *rdev)
/* Resume clock before doing reset */
r300_clock_startup(rdev);
/* Reset gpu before posting otherwise ATOM will enter infinite loop */
-   if (radeon_gpu_reset(rdev)) {
+   if (radeon_asic_reset(rdev)) {
dev_warn(rdev-dev, GPU reset failed ! (0xE40=0x%08X, 
0x7C0=0x%08X)\n,
RREG32(R_000E40_RBBM_STATUS),
RREG32(R_0007C0_CP_STAT));
@@ -1400,7 +1400,7 @@ int r300_init(struct radeon_device *rdev)
return r;
}
/* Reset gpu before posting otherwise ATOM will enter infinite loop */
-   if (radeon_gpu_reset(rdev)) {
+   if (radeon_asic_reset(rdev)) {
dev_warn(rdev-dev,
GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n,
RREG32(R_000E40_RBBM_STATUS),
diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c
index c7593b8..3221855 100644
--- a/drivers/gpu/drm/radeon/r420.c
+++ b/drivers/gpu/drm/radeon/r420.c
@@ -233,7 +233,7 @@ int r420_resume(struct radeon_device *rdev)
/* Resume clock before doing reset */
r420_clock_resume(rdev);
/* Reset gpu before posting otherwise ATOM will enter infinite loop */
-   if (radeon_gpu_reset(rdev)) {
+   if (radeon_asic_reset(rdev)) {
dev_warn(rdev-dev, GPU reset failed ! (0xE40=0x%08X, 
0x7C0=0x%08X)\n,
RREG32(R_000E40_RBBM_STATUS),
RREG32(R_0007C0_CP_STAT));
@@ -313,7 +313,7 @@ int r420_init(struct radeon_device *rdev)
}
}
/* Reset gpu before posting

Improved GPU reset

2010-03-09 Thread Jerome Glisse
This serie of patch fix the shortcoming of the previous one, their
shouldn't be any more false positive and resume should lead to
infinite look or other reinitialization issue on r6xx/r7xx.

Cheers,
Jerome


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


Re: Improved GPU reset

2010-03-08 Thread Jerome Glisse
On Sun, Mar 07, 2010 at 10:41:32AM +0100, Ladislav Kunc wrote:
 On Friday 05 March 2010 11:30:02 Jerome Glisse wrote:
  This patches improve the GPU reset, many time i able to successfully
  reset the GPU and carry on operation, note that after a reset you
  will likely see corrpution on the screen. Hope is that we should now
  be able to capture faulty command stream.
  
  I still need to do a full retesting with this patch (especialy
  suspend/resume).
  
 
 Hi Jerome,
 
 after application of all of your 3 patches, I got GPU softresets in log from 
 time to time and sometimes also hard locks. Is there a way how to capture the 
 faulty command to log? How to turn on some debug logging?
 
 I use Mobility Radeon HD3450. And the GPU lockups occur mostly during window 
 manipulation in KDE (both compositing/without compositing).
 
 Best regards,
 Ladislav
 

By hard locks you mean you can't ssh in the computer ? There is a bug in
my patches which lead to mutex/irqlock deadlock i don't understand it yet
but i can happen and might explain your hardlock. Once i fixed those i
will send new versions of the patches.

I plan to latter add infrastructure to capture faulty instruction but
this isn't yet done and this will likely need huge pile of code.

Cheers,
Jerome

--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


Improved GPU reset

2010-03-05 Thread Jerome Glisse
This patches improve the GPU reset, many time i able to successfully
reset the GPU and carry on operation, note that after a reset you
will likely see corrpution on the screen. Hope is that we should now
be able to capture faulty command stream.

I still need to do a full retesting with this patch (especialy
suspend/resume).


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


[PATCH 2/3] drm/radeon/kms: rename gpu_reset to asic_reset

2010-03-05 Thread Jerome Glisse
Patch rename gpu_reset to asic_reset in prevision of having
gpu_reset doing more stuff than just basic asic reset.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/radeon/evergreen.c |2 +-
 drivers/gpu/drm/radeon/r100.c  |6 ++--
 drivers/gpu/drm/radeon/r300.c  |6 ++--
 drivers/gpu/drm/radeon/r420.c  |4 +-
 drivers/gpu/drm/radeon/r520.c  |4 +-
 drivers/gpu/drm/radeon/r600.c  |2 +-
 drivers/gpu/drm/radeon/radeon.h|6 ++--
 drivers/gpu/drm/radeon/radeon_asic.h   |   36 
 drivers/gpu/drm/radeon/radeon_device.c |2 +-
 drivers/gpu/drm/radeon/radeon_fence.c  |2 +-
 drivers/gpu/drm/radeon/rs400.c |4 +-
 drivers/gpu/drm/radeon/rs600.c |4 +-
 drivers/gpu/drm/radeon/rs690.c |4 +-
 drivers/gpu/drm/radeon/rv515.c |8 +++---
 14 files changed, 45 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/radeon/evergreen.c 
b/drivers/gpu/drm/radeon/evergreen.c
index 8988df7..f1a860c 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -496,7 +496,7 @@ bool evergreen_gpu_is_lockup(struct radeon_device *rdev)
return false;
 }
 
-int evergreen_gpu_reset(struct radeon_device *rdev)
+int evergreen_asic_reset(struct radeon_device *rdev)
 {
/* FIXME: implement for evergreen */
return 0;
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 96a6370..f5b46a9 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -1856,7 +1856,7 @@ bool r100_gpu_is_lockup(struct radeon_device *rdev)
return r100_gpu_cp_is_lockup(rdev, rdev-config.r100.lockup, 
rdev-cp);
 }
 
-int r100_gpu_reset(struct radeon_device *rdev)
+int r100_asic_reset(struct radeon_device *rdev)
 {
uint32_t status;
 
@@ -3498,7 +3498,7 @@ int r100_resume(struct radeon_device *rdev)
/* Resume clock before doing reset */
r100_clock_startup(rdev);
/* Reset gpu before posting otherwise ATOM will enter infinite loop */
-   if (radeon_gpu_reset(rdev)) {
+   if (radeon_asic_reset(rdev)) {
dev_warn(rdev-dev, GPU reset failed ! (0xE40=0x%08X, 
0x7C0=0x%08X)\n,
RREG32(R_000E40_RBBM_STATUS),
RREG32(R_0007C0_CP_STAT));
@@ -3566,7 +3566,7 @@ int r100_init(struct radeon_device *rdev)
return r;
}
/* Reset gpu before posting otherwise ATOM will enter infinite loop */
-   if (radeon_gpu_reset(rdev)) {
+   if (radeon_asic_reset(rdev)) {
dev_warn(rdev-dev,
GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n,
RREG32(R_000E40_RBBM_STATUS),
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 08b79c0..fd162f0 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -446,7 +446,7 @@ bool r300_gpu_is_lockup(struct radeon_device *rdev)
return r100_gpu_cp_is_lockup(rdev, rdev-config.r300.lockup, 
rdev-cp);
 }
 
-int r300_gpu_reset(struct radeon_device *rdev)
+int r300_asic_reset(struct radeon_device *rdev)
 {
uint32_t status;
 
@@ -1329,7 +1329,7 @@ int r300_resume(struct radeon_device *rdev)
/* Resume clock before doing reset */
r300_clock_startup(rdev);
/* Reset gpu before posting otherwise ATOM will enter infinite loop */
-   if (radeon_gpu_reset(rdev)) {
+   if (radeon_asic_reset(rdev)) {
dev_warn(rdev-dev, GPU reset failed ! (0xE40=0x%08X, 
0x7C0=0x%08X)\n,
RREG32(R_000E40_RBBM_STATUS),
RREG32(R_0007C0_CP_STAT));
@@ -1399,7 +1399,7 @@ int r300_init(struct radeon_device *rdev)
return r;
}
/* Reset gpu before posting otherwise ATOM will enter infinite loop */
-   if (radeon_gpu_reset(rdev)) {
+   if (radeon_asic_reset(rdev)) {
dev_warn(rdev-dev,
GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n,
RREG32(R_000E40_RBBM_STATUS),
diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c
index c7593b8..3221855 100644
--- a/drivers/gpu/drm/radeon/r420.c
+++ b/drivers/gpu/drm/radeon/r420.c
@@ -233,7 +233,7 @@ int r420_resume(struct radeon_device *rdev)
/* Resume clock before doing reset */
r420_clock_resume(rdev);
/* Reset gpu before posting otherwise ATOM will enter infinite loop */
-   if (radeon_gpu_reset(rdev)) {
+   if (radeon_asic_reset(rdev)) {
dev_warn(rdev-dev, GPU reset failed ! (0xE40=0x%08X, 
0x7C0=0x%08X)\n,
RREG32(R_000E40_RBBM_STATUS),
RREG32(R_0007C0_CP_STAT));
@@ -313,7 +313,7 @@ int r420_init(struct radeon_device *rdev)
}
}
/* Reset gpu before posting

[PATCH 3/3] drm/radeon/kms: simplify improve GPU reset

2010-03-05 Thread Jerome Glisse
This simplify and improve GPU reset for R1XX-R6XX hw, it's
not 100% reliable here are result:
- R1XX/R2XX works bunch of time in a row, sometimes it
  seems it can work indifinitly
- R3XX/R3XX the most unreliable one, sometimes you will be
  able to reset few times, sometimes not even once
- R5XX more reliable than previous hw, seems to work most
  of the times but once in a while it fails for no obvious
  reasons (same status than previous reset just no same
  happy ending)
- R6XX/R7XX are lot more reliable with this patch, still
  it seems that it can fail after a bunch (reset every
  2sec for 3hour bring down the GPU  computer)

This have been tested on various hw, for some odd reasons
i wasn't able to lockup RS480/RS690 (while they use to
love locking up).

Note that on R1XX-R5XX the cursor will disapear after
lockup haven't checked why, switch to console and back
to X will restore cursor.

Next step is to record the bogus command that leaded to
the lockup.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/radeon/r100.c  |  180 +++
 drivers/gpu/drm/radeon/r100d.h |  128 ++
 drivers/gpu/drm/radeon/r300.c  |  134 +++-
 drivers/gpu/drm/radeon/r300d.h |   47 -
 drivers/gpu/drm/radeon/r520.c  |1 -
 drivers/gpu/drm/radeon/r600.c  |   53 +-
 drivers/gpu/drm/radeon/radeon.h|4 +-
 drivers/gpu/drm/radeon/radeon_asic.h   |   12 +-
 drivers/gpu/drm/radeon/radeon_device.c |   20 
 drivers/gpu/drm/radeon/radeon_fence.c  |5 +-
 drivers/gpu/drm/radeon/radeon_gart.c   |4 +
 drivers/gpu/drm/radeon/rs400.c |2 -
 drivers/gpu/drm/radeon/rs600.c |   73 +-
 drivers/gpu/drm/radeon/rs600d.h|   46 
 drivers/gpu/drm/radeon/rs690.c |2 -
 drivers/gpu/drm/radeon/rv515.c |   90 
 drivers/gpu/drm/radeon/rv515d.h|   46 
 17 files changed, 501 insertions(+), 346 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index f5b46a9..91e3b57 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -659,26 +659,6 @@ int r100_cp_init(struct radeon_device *rdev, unsigned 
ring_size)
if (r100_debugfs_cp_init(rdev)) {
DRM_ERROR(Failed to register debugfs file for CP !\n);
}
-   /* Reset CP */
-   tmp = RREG32(RADEON_CP_CSQ_STAT);
-   if ((tmp  (1  31))) {
-   DRM_INFO(radeon: cp busy (0x%08X) resetting\n, tmp);
-   WREG32(RADEON_CP_CSQ_MODE, 0);
-   WREG32(RADEON_CP_CSQ_CNTL, 0);
-   WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_CP);
-   tmp = RREG32(RADEON_RBBM_SOFT_RESET);
-   mdelay(2);
-   WREG32(RADEON_RBBM_SOFT_RESET, 0);
-   tmp = RREG32(RADEON_RBBM_SOFT_RESET);
-   mdelay(2);
-   tmp = RREG32(RADEON_CP_CSQ_STAT);
-   if ((tmp  (1  31))) {
-   DRM_INFO(radeon: cp reset failed (0x%08X)\n, tmp);
-   }
-   } else {
-   DRM_INFO(radeon: cp idle (0x%08X)\n, tmp);
-   }
-
if (!rdev-me_fw) {
r = r100_cp_init_microcode(rdev);
if (r) {
@@ -781,39 +761,6 @@ void r100_cp_disable(struct radeon_device *rdev)
}
 }
 
-int r100_cp_reset(struct radeon_device *rdev)
-{
-   uint32_t tmp;
-   bool reinit_cp;
-   int i;
-
-   reinit_cp = rdev-cp.ready;
-   rdev-cp.ready = false;
-   WREG32(RADEON_CP_CSQ_MODE, 0);
-   WREG32(RADEON_CP_CSQ_CNTL, 0);
-   WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_CP);
-   (void)RREG32(RADEON_RBBM_SOFT_RESET);
-   udelay(200);
-   WREG32(RADEON_RBBM_SOFT_RESET, 0);
-   /* Wait to prevent race in RBBM_STATUS */
-   mdelay(1);
-   for (i = 0; i  rdev-usec_timeout; i++) {
-   tmp = RREG32(RADEON_RBBM_STATUS);
-   if (!(tmp  (1  16))) {
-   DRM_INFO(CP reset succeed (RBBM_STATUS=0x%08X)\n,
-tmp);
-   if (reinit_cp) {
-   return r100_cp_init(rdev, rdev-cp.ring_size);
-   }
-   return 0;
-   }
-   DRM_UDELAY(1);
-   }
-   tmp = RREG32(RADEON_RBBM_STATUS);
-   DRM_ERROR(Failed to reset CP (RBBM_STATUS=0x%08X)!\n, tmp);
-   return -1;
-}
-
 void r100_cp_commit(struct radeon_device *rdev)
 {
WREG32(RADEON_CP_RB_WPTR, rdev-cp.wptr);
@@ -1727,51 +1674,6 @@ int r100_mc_wait_for_idle(struct radeon_device *rdev)
return -1;
 }
 
-void r100_gpu_init(struct radeon_device *rdev)
-{
-   /* TODO: anythings to do here ? pipes ? */
-   r100_hdp_reset(rdev);
-}
-
-void r100_hdp_reset(struct radeon_device *rdev)
-{
-   uint32_t tmp;
-
-   tmp = RREG32

Re: [git pull] drm request 3

2010-03-05 Thread Jerome Glisse
On Fri, Mar 05, 2010 at 04:31:29PM +, Alan Cox wrote:
 On Fri, 05 Mar 2010 08:06:26 -0800 (PST)
 David Miller da...@davemloft.net wrote:
 
  From: Daniel Stone dan...@fooishbar.org
  Date: Fri, 5 Mar 2010 18:04:34 +0200
  
   So you're saying that there's no way to develop any reasonable body of
   code for the Linux kernel without committing to keeping your ABI
   absolutely rock-solid stable for eternity, no exceptions, ever? Cool,
   that worked really well for Xlib.
  
  read() still works the same way it did 30 years ago last time I
  checked.
 
 Thats disingenous as read() is a method not an interface. It's also wrong
 because read() and write() behaviour has changed in various ways and old
 code broke because of it in subtle ways. Keeping the same method behaviour
 would have required things like new versions of read() for 64bit files,
 nonblocking, mandlocks, NFS, networking, etc all of which changed the
 core read() behaviour. I've yet to see anyone meaningfully argue it was
 the wrong thing to do.
 
 Alan
 

Also GPU API is way more complex than any others kernel API
(at least to my knowledge) and you can't know if the API you
have is the good one until you have a fully working  fast
3D driver ... and that takes either a lot of time with
a lot of people.

Cheers,
Jerome

--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


Re: TTM split no_wait argument in 2 (no_wait_reserve, no_wait_gpu)

2010-03-04 Thread Jerome Glisse
On Thu, Feb 25, 2010 at 05:50:10PM +0100, Jerome Glisse wrote:
 This patch change the TTM API to allow driver to select btw choosing
 to wait or not either for bo reserve or GPU wait separately. This is
 needed for the unmappabled VRAM work.
 
 Comments ?
 
 Cheers,
 Jerome

Thomas any chance you review both this change and the iomap callback
change ? The merge window is closing soon.

Cheers,
Jerome

--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


Re: [RFC] drm/ttm: add pool wc/uc page allocator

2010-03-03 Thread Jerome Glisse
On Wed, Mar 03, 2010 at 05:46:43PM +0200, Pauli Nieminen wrote:
 On Wed, Mar 3, 2010 at 4:50 PM, Jerome Glisse gli...@freedesktop.org wrote:
  On Wed, Mar 03, 2010 at 04:23:08PM +0200, Pauli Nieminen wrote:
  On Wed, Mar 3, 2010 at 3:33 PM, Jerome Glisse gli...@freedesktop.org 
  wrote:
   On Tue, Mar 02, 2010 at 12:32:54AM +0200, Pauli Nieminen wrote:
   On Sun, Feb 28, 2010 at 11:30 PM, Pauli Nieminen suok...@gmail.com 
   wrote:
On AGP system we might allocate/free routinely uncached or wc memory,
changing page from cached (wb) to uc or wc is very expensive and 
involves
a lot of flushing. To improve performance this allocator use a pool
of uc,wc pages.
   
Pools are linked lists of pages. Ordered so that first is the latest 
adition
to the pool and last is the oldest page. Old pages are periodicaly 
freed from
pools to keep memory use at minimum.
   
Pools are protected with spinlocks to allow multiple threads to 
allocate pages
simultanously. Expensive operations must not hold the spinlock to 
maximise
performance for multiple threads.
   
Based on Jerome Glisse's and Dave Airlie's pool allocator.
   
Signed-off-by: Jerome Glisse jgli...@redhat.com
Signed-off-by: Dave Airlie airl...@redhat.com
Signed-off-by: Pauli Nieminen suok...@gmail.com
  
   I think it's overdesigned, by trying to be to clever we often endup
   with code more complex and less efficient in the end. While the idea
   to free page only after some amount of time is the way to go, i think
   a simpler approach will perform a better job. For instance:
  
   pool {
    npages // npages in pool
    page *pages[enoughttohold16Morsomethingconfigurable]
    unsigned long next_free_time;
    lock
   }
   filllocked(pool,npages)
   {
          if (npages  MAXPAGESPERPOOL)
                  npages = MAXPAGESPERPOOL;
          for i, npages : allocpage
   }
   alloc(apages, npages)
   {
          do {
          lock(pool)
          pool-next_free_time = jiffies + timeout;
          if (npages  pool-npages) {
                  npages -= pool-npages;
                  copypageptr(apages, pool-pages, pool-npages)
                  pool-npages = 0;
                  fillpool(pool, npages)
          } else {
                  pool-npages -= npages;
                  copypageptr(apages, pool-pages, npages)
                  npages = 0;
          }
          unlock(pool)
          } while (npages)
   }
   poolshrinkcallbacktimer()
   {
          for i in npools {
                  if (trylock(pool[i].lock) {
                          if (timeafter(jiffies, pool[i].nextfreetime)) {
                                  free bunch of pages for instance 1M at a 
   time
                          }
                          unlock(pool[i].lock)
                  }
          }
   }
 
  I would need to use split buffer for that big arrays. My aim was to
  have FILO queue pages so knowing which pages can be freed now is
  simple because the oldest pages is always in the head of the list. I
  know that FILO is easy to implement on top of deque but requirement to
  split underlying array sounded like more complex than using linked
  list. But as I noted in IRC. Requesting for N pages from the linked
  list is doomed to have O(N) performance which is bad. So better
  implementation would be switching to an array.
 
  The design i outlined above drop the concept of recording time per
  page so no need for a FILO, you just fill the pool and you empty
  it 1M or less at a time every Nmsec this is a lot simpler no complex
  spliting or list walking. One simple optimization is to make the
  amount of page we free dependent of how much the pool is fill so
  if there is 16M of memory in the pool free 1M at a time, but if there
  64K then free one page at a time.
 
 
 I think that we should take into acount the dynamic memory amount that
 current user active is requiring. What if there was case ever that
 dynamic memory sizes is 20M? List based implementation is easier to
 scale to what user space is doing.
 20M isn't even much if you think how much dynamic allocations would be
 done by multiseat system (driving multiple GPUs same time).
 

You mean an app constantly allocating 20M and freeing 20M ? even with
multiple GPU this sounds like broken userspace, note that i don't think
there is dual AGP motherboard out there but i kind of forget about such
evil thing. Also, i picked 32M randomly as i think best choice i making
the pool as big as AGP aperture. Anyway bottom line is that we should
not think to all possible use case but rather worry about the most
common sensible one.

 I also tough about the array implementation and there is also that
 O(N) cost anyway.
 
 But imporements that I think would help here:
 - Add the shrinker callback and don't use own timer.
 - Simplify the implementation a lot.
 - Maybe having pool for cached pages too would make it a lot simpler
 when all allocations would

Re: [SPAM] [PATCH 1/3] drm/radeon/kms: fence cleanup + more reliable GPU lockup detection V2

2010-03-02 Thread Jerome Glisse
On Tue, Mar 02, 2010 at 10:13:19AM +0100, Erik Andrén wrote:
 2010/3/1  y:
  From: Jerome Glisse jgli...@redhat.com
 
  This patch cleanup the fence code, it drops the timeout field of
  fence as the time to complete each IB is unpredictable and shouldn't
  be bound.
 
  The fence cleanup lead to GPU lockup detection improvement, this
  patch introduce a callback, allowing to do asic specific test for
  lockup detection. In this patch the CP is use as a first indicator
  of GPU lockup. If CP doesn't make progress during 1second we assume
  we are facing a GPU lockup.
 
  To avoid overhead of testing GPU lockup frequently due to fence
  taking time to be signaled we query the lockup callback every
  500msec. There is plenty code comment explaining the design  choise
  inside the code.
 
  This have been tested mostly on R3XX/R5XX hw, in normal running
  destkop (compiz firefox, quake3 running) the lockup callback wasn't
  call once (1 hour session). Also tested with forcing GPU lockup and
  lockup was reported after the 1s CP activity timeout.
 
  V2 switch to 500ms timeout so GPU lockup get call at least 2 times
    in less than 2sec.
 
  Signed-off-by: Jerome Glisse jgli...@redhat.com
  ---
   drivers/gpu/drm/radeon/evergreen.c    |    6 ++
   drivers/gpu/drm/radeon/r100.c         |   84 +++
   drivers/gpu/drm/radeon/r300.c         |   27 -
   drivers/gpu/drm/radeon/r600.c         |   33 +-
   drivers/gpu/drm/radeon/radeon.h       |  102 
  ++--
   drivers/gpu/drm/radeon/radeon_asic.h  |   20 ++-
   drivers/gpu/drm/radeon/radeon_fence.c |   75 +---
   drivers/gpu/drm/radeon/rv770.c        |    6 --
   8 files changed, 248 insertions(+), 105 deletions(-)
 
  diff --git a/drivers/gpu/drm/radeon/evergreen.c 
  b/drivers/gpu/drm/radeon/evergreen.c
  index bd2e7aa..8988df7 100644
  --- a/drivers/gpu/drm/radeon/evergreen.c
  +++ b/drivers/gpu/drm/radeon/evergreen.c
  @@ -490,6 +490,12 @@ int evergreen_mc_init(struct radeon_device *rdev)
         return 0;
   }
 
  +bool evergreen_gpu_is_lockup(struct radeon_device *rdev)
  +{
  +       /* FIXME: implement for evergreen */
  +       return false;
  +}
  +
   int evergreen_gpu_reset(struct radeon_device *rdev)
   {
         /* FIXME: implement for evergreen */
  diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
  index 91eb762..96a6370 100644
  --- a/drivers/gpu/drm/radeon/r100.c
  +++ b/drivers/gpu/drm/radeon/r100.c
  @@ -1772,6 +1772,90 @@ int r100_rb2d_reset(struct radeon_device *rdev)
         return -1;
   }
 
  +void r100_gpu_lockup_update(struct r100_gpu_lockup *lockup, struct 
  radeon_cp *cp)
  +{
  +       lockup-last_cp_rptr = cp-rptr;
  +       lockup-last_jiffies = jiffies;
  +}
  +
  +/**
  + * r100_gpu_cp_is_lockup() - check if CP is lockup by recording information
  + * @rdev:      radeon device structure
  + * @lockup:    r100_gpu_lockup structure holding CP lockup tracking 
  informations
  + * @cp:                radeon_cp structure holding CP information
  + *
  + * We don't need to initialize the lockup tracking information as we will 
  either
  + * have CP rptr to a different value of jiffies wrap around which will 
  force
  + * initialization of the lockup tracking informations.
  + *
  + * A possible false positivie is if we get call after while and 
  last_cp_rptr ==
  + * the current CP rptr, even if it's unlikely it might happen. To avoid 
  this
  + * if the elapsed time since last call is bigger than 2 second than we 
  return
  + * false and update the tracking information. Due to this the caller must 
  call
  + * r100_gpu_cp_is_lockup several time in less than 2sec for lockup to be 
  reported
  + * the fencing code should be cautious about that.
  + *
  + * Caller should write to the ring to force CP to do something so we don't 
  get
  + * false positive when CP is just gived nothing to do.
  + *
  + **/
  +bool r100_gpu_cp_is_lockup(struct radeon_device *rdev, struct 
  r100_gpu_lockup *lockup, struct radeon_cp *cp)
  +{
  +       unsigned long cjiffies, elapsed;
  +
  +       cjiffies = jiffies;
  +       if (!time_after(cjiffies, lockup-last_jiffies)) {
  +               /* likely a wrap around */
  +               lockup-last_jiffies = jiffies;
  +               return false;
  +       }
  +       if (cp-rptr != lockup-last_cp_rptr) {
  +               /* CP is still working no lockup */
  +               lockup-last_cp_rptr = cp-rptr;
  +               lockup-last_jiffies = jiffies;
  +               return false;
  +       }
  +       elapsed = jiffies_to_msecs(cjiffies - lockup-last_jiffies);
  +       if (elapsed = 3000) {
  +               /* very likely the improbable case where current
  +                * rptr is equal to last recorded, a while ago, rptr
  +                * this is more likely a false positive update tracking
  +                * information which should force us to be recall

[PATCH] drm/radeon/kms: catch atombios infinite loop and break out of it

2010-03-02 Thread Jerome Glisse
In somecase the atombios code might lead to infinite loop because
the GPU is in broken state, this patch track the jump history and
will abort atombios execution if we are stuck executing the same
jump for more than 1sec. Note that otherwise in some case we might
enter an infinite loop in the kernel context which is bad.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/radeon/atom.c |   59 
 drivers/gpu/drm/radeon/atom.h |2 +-
 2 files changed, 48 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/radeon/atom.c b/drivers/gpu/drm/radeon/atom.c
index d75788f..b7fe660 100644
--- a/drivers/gpu/drm/radeon/atom.c
+++ b/drivers/gpu/drm/radeon/atom.c
@@ -52,15 +52,17 @@
 
 typedef struct {
struct atom_context *ctx;
-
uint32_t *ps, *ws;
int ps_shift;
uint16_t start;
+   unsigned last_jump;
+   unsigned long last_jump_jiffies;
+   bool abort;
 } atom_exec_context;
 
 int atom_debug = 0;
-static void atom_execute_table_locked(struct atom_context *ctx, int index, 
uint32_t * params);
-void atom_execute_table(struct atom_context *ctx, int index, uint32_t * 
params);
+static int atom_execute_table_locked(struct atom_context *ctx, int index, 
uint32_t * params);
+int atom_execute_table(struct atom_context *ctx, int index, uint32_t * params);
 
 static uint32_t atom_arg_mask[8] =
 { 0x, 0x, 0x00, 0x, 0xFF, 0xFF00, 0xFF,
@@ -604,12 +606,17 @@ static void atom_op_beep(atom_exec_context *ctx, int 
*ptr, int arg)
 static void atom_op_calltable(atom_exec_context *ctx, int *ptr, int arg)
 {
int idx = U8((*ptr)++);
+   int r = 0;
+
if (idx  ATOM_TABLE_NAMES_CNT)
SDEBUG(   table: %d (%s)\n, idx, atom_table_names[idx]);
else
SDEBUG(   table: %d\n, idx);
if (U16(ctx-ctx-cmd_table + 4 + 2 * idx))
-   atom_execute_table_locked(ctx-ctx, idx, ctx-ps + 
ctx-ps_shift);
+   r = atom_execute_table_locked(ctx-ctx, idx, ctx-ps + 
ctx-ps_shift);
+   if (r) {
+   ctx-abort = true;
+   }
 }
 
 static void atom_op_clear(atom_exec_context *ctx, int *ptr, int arg)
@@ -673,6 +680,8 @@ static void atom_op_eot(atom_exec_context *ctx, int *ptr, 
int arg)
 static void atom_op_jump(atom_exec_context *ctx, int *ptr, int arg)
 {
int execute = 0, target = U16(*ptr);
+   unsigned long cjiffies;
+
(*ptr) += 2;
switch (arg) {
case ATOM_COND_ABOVE:
@@ -700,8 +709,25 @@ static void atom_op_jump(atom_exec_context *ctx, int *ptr, 
int arg)
if (arg != ATOM_COND_ALWAYS)
SDEBUG(   taken: %s\n, execute ? yes : no);
SDEBUG(   target: 0x%04X\n, target);
-   if (execute)
+   if (execute) {
+   if (ctx-last_jump == (ctx-start + target)) {
+   cjiffies = jiffies;
+   if (time_after(cjiffies, ctx-last_jump_jiffies)) {
+   cjiffies -= ctx-last_jump_jiffies;
+   if ((jiffies_to_msecs(cjiffies)  1000)) {
+   DRM_ERROR(atombios stuck in loop for 
more than 1sec aborting\n);
+   ctx-abort = true;
+   }
+   } else {
+   /* jiffies wrap around we will just wait a 
little longer */
+   ctx-last_jump_jiffies = jiffies;
+   }
+   } else {
+   ctx-last_jump = ctx-start + target;
+   ctx-last_jump_jiffies = jiffies;
+   }
*ptr = ctx-start + target;
+   }
 }
 
 static void atom_op_mask(atom_exec_context *ctx, int *ptr, int arg)
@@ -1104,7 +1130,7 @@ static struct {
atom_op_shr, ATOM_ARG_MC}, {
 atom_op_debug, 0},};
 
-static void atom_execute_table_locked(struct atom_context *ctx, int index, 
uint32_t * params)
+static int atom_execute_table_locked(struct atom_context *ctx, int index, 
uint32_t * params)
 {
int base = CU16(ctx-cmd_table + 4 + 2 * index);
int len, ws, ps, ptr;
@@ -1112,7 +1138,7 @@ static void atom_execute_table_locked(struct atom_context 
*ctx, int index, uint3
atom_exec_context ectx;
 
if (!base)
-   return;
+   return -EINVAL;
 
len = CU16(base + ATOM_CT_SIZE_PTR);
ws = CU8(base + ATOM_CT_WS_PTR);
@@ -1125,6 +1151,8 @@ static void atom_execute_table_locked(struct atom_context 
*ctx, int index, uint3
ectx.ps_shift = ps / 4;
ectx.start = base;
ectx.ps = params;
+   ectx.abort = false;
+   ectx.last_jump = 0;
if (ws)
ectx.ws = kzalloc(4 * ws, GFP_KERNEL);
else
@@ -1137,6 +1165,11 @@ static void atom_execute_table_locked(struct 
atom_context *ctx, int index, uint3
SDEBUG(%s @ 0x%04X\n

[PATCH 1/3] drm/radeon/kms: fence cleanup + more reliable GPU lockup detection V3

2010-03-02 Thread Jerome Glisse
This patch cleanup the fence code, it drops the timeout field of
fence as the time to complete each IB is unpredictable and shouldn't
be bound.

The fence cleanup lead to GPU lockup detection improvement, this
patch introduce a callback, allowing to do asic specific test for
lockup detection. In this patch the CP is use as a first indicator
of GPU lockup. If CP doesn't make progress during 1second we assume
we are facing a GPU lockup.

To avoid overhead of testing GPU lockup frequently due to fence
taking time to be signaled we query the lockup callback every
500msec. There is plenty code comment explaining the design  choise
inside the code.

This have been tested mostly on R3XX/R5XX hw, in normal running
destkop (compiz firefox, quake3 running) the lockup callback wasn't
call once (1 hour session). Also tested with forcing GPU lockup and
lockup was reported after the 1s CP activity timeout.

V2 switch to 500ms timeout so GPU lockup get call at least 2 times
   in less than 2sec.
V3 store last jiffies in fence struct so on ERESTART, EBUSY we keep
   track of how long we already wait for a given fence

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/radeon/Makefile   |2 +-
 drivers/gpu/drm/radeon/evergreen.c|6 ++
 drivers/gpu/drm/radeon/r100.c |   84 ++
 drivers/gpu/drm/radeon/r300.c |   27 -
 drivers/gpu/drm/radeon/r600.c |   33 +-
 drivers/gpu/drm/radeon/radeon.h   |  104 +++--
 drivers/gpu/drm/radeon/radeon_asic.h  |   20 ++-
 drivers/gpu/drm/radeon/radeon_fence.c |  102 +---
 drivers/gpu/drm/radeon/rv770.c|6 --
 9 files changed, 277 insertions(+), 107 deletions(-)

diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile
index 0adf49e..ed38262 100644
--- a/drivers/gpu/drm/radeon/Makefile
+++ b/drivers/gpu/drm/radeon/Makefile
@@ -63,6 +63,6 @@ radeon-y += radeon_device.o radeon_kms.o \
evergreen.o
 
 radeon-$(CONFIG_COMPAT) += radeon_ioc32.o
-radeon-$(CONFIG_VGA_SWITCHEROO) += radone_atpx_handler.o
+radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o
 
 obj-$(CONFIG_DRM_RADEON)+= radeon.o
diff --git a/drivers/gpu/drm/radeon/evergreen.c 
b/drivers/gpu/drm/radeon/evergreen.c
index bd2e7aa..8988df7 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -490,6 +490,12 @@ int evergreen_mc_init(struct radeon_device *rdev)
return 0;
 }
 
+bool evergreen_gpu_is_lockup(struct radeon_device *rdev)
+{
+   /* FIXME: implement for evergreen */
+   return false;
+}
+
 int evergreen_gpu_reset(struct radeon_device *rdev)
 {
/* FIXME: implement for evergreen */
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 91eb762..96a6370 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -1772,6 +1772,90 @@ int r100_rb2d_reset(struct radeon_device *rdev)
return -1;
 }
 
+void r100_gpu_lockup_update(struct r100_gpu_lockup *lockup, struct radeon_cp 
*cp)
+{
+   lockup-last_cp_rptr = cp-rptr;
+   lockup-last_jiffies = jiffies;
+}
+
+/**
+ * r100_gpu_cp_is_lockup() - check if CP is lockup by recording information
+ * @rdev:  radeon device structure
+ * @lockup:r100_gpu_lockup structure holding CP lockup tracking 
informations
+ * @cp:radeon_cp structure holding CP information
+ *
+ * We don't need to initialize the lockup tracking information as we will 
either
+ * have CP rptr to a different value of jiffies wrap around which will force
+ * initialization of the lockup tracking informations.
+ *
+ * A possible false positivie is if we get call after while and last_cp_rptr ==
+ * the current CP rptr, even if it's unlikely it might happen. To avoid this
+ * if the elapsed time since last call is bigger than 2 second than we return
+ * false and update the tracking information. Due to this the caller must call
+ * r100_gpu_cp_is_lockup several time in less than 2sec for lockup to be 
reported
+ * the fencing code should be cautious about that.
+ *
+ * Caller should write to the ring to force CP to do something so we don't get
+ * false positive when CP is just gived nothing to do.
+ *
+ **/
+bool r100_gpu_cp_is_lockup(struct radeon_device *rdev, struct r100_gpu_lockup 
*lockup, struct radeon_cp *cp)
+{
+   unsigned long cjiffies, elapsed;
+
+   cjiffies = jiffies;
+   if (!time_after(cjiffies, lockup-last_jiffies)) {
+   /* likely a wrap around */
+   lockup-last_jiffies = jiffies;
+   return false;
+   }
+   if (cp-rptr != lockup-last_cp_rptr) {
+   /* CP is still working no lockup */
+   lockup-last_cp_rptr = cp-rptr;
+   lockup-last_jiffies = jiffies;
+   return false;
+   }
+   elapsed = jiffies_to_msecs(cjiffies - lockup-last_jiffies

Re: [PATCH] drm/radeon/kms: fence cleanup + more reliable GPU lockup detection

2010-03-01 Thread Jerome Glisse
On Mon, Mar 01, 2010 at 04:00:02PM +1000, Dave Airlie wrote:
 On Mon, Mar 1, 2010 at 2:47 AM, Jerome Glisse jgli...@redhat.com wrote:
  On Sun, Feb 28, 2010 at 12:22:52PM +, Alan Swanson wrote:
  On Fri, 2010-02-26 at 15:49 +0100, Jerome Glisse wrote:
   This patch cleanup the fence code, it drops the timeout field of
   fence as the time to complete each IB is unpredictable and shouldn't
   be bound.
  
   The fence cleanup lead to GPU lockup detection improvement, this
   patch introduce a callback, allowing to do asic specific test for
   lockup detection. In this patch the CP is use as a first indicator
   of GPU lockup. If CP doesn't make progress during 1second we assume
   we are facing a GPU lockup.
  
   To avoid overhead of testing GPU lockup frequently due to fence
   taking time to be signaled we query the lockup callback every
   100msec. There is plenty code comment explaining the design  choise
   inside the code.
 
  Every 100msec? Is this running all the time? If so, that's not very good
  for CPU power saving to lower C-states in an idle system. We could at
  least use one of the round_jiffies.
 
 
  This run only when userspace call bo wait thus it only happen when userspace
  is waiting for something.
 
 Why not just test when the old timeout code used to test? every second or so?
 
 I'm not sure why with the old code instead of assuming a fence timeout implied
 a lockup you didn't just change it to test if it was a real lockup and
 continue waiting
 if the GPU was making progress. This seems simpler, though maybe the cleanups
 are worth it.
 
 Dave.
 

The old code was misleading we didn't test every second or so but
it was depending on fence timeout and it was quite small amount
of time don't remember of hand. Here on the fast r7xx with 2 quake3
bunch of gears and compiz the average time for fence is 20ms.
But i think i will switch back to half a second timeout some irq
will likely wake up us.

Note that if you remove the comment lines in my patch i am pretty
sure my patch is removing code rather than adding some :) I will
do a V3 today.

Cheers,
Jerome

--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


Re: Convert DRM_INFO/DRM_ERROR to dev_info/dev_err

2010-03-01 Thread Jerome Glisse
On Mon, Mar 01, 2010 at 03:47:35PM +1000, Dave Airlie wrote:
 On Fri, Feb 26, 2010 at 4:56 AM, Jerome Glisse gli...@freedesktop.org wrote:
  Attached is conversion from DRM_INFO/DRM_ERROR to dev_info/dev_err
  to apply it copy all doconv* file into the radeon subfolder of the
  kernel run ./doconv.sh and then apply the 0001 patch which fix
  compilation after conversion (place where struct radeon_device is
  missing) then thing should compile
 
  I think it's worthwhile cleanup especialy on multi GPU configuration.
 
 Does this not remove the drm log levels?
 
 so we end up with everything in dmesg the whole time?
 
 that seems wrong, I'd rather pass a dev to DRM_ERROR/DRM_INFO
 or maybe defined DRM_DEV_ERROR, DRM_DEV_INFO.
 
 Dave.


This doesn't touch DRM_DEBUG so log level is unaffected.
But if you prefer havimg DRM_DEV* i could convert radeon
to that.

Cheers,
Jerome

--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


Re: [PATCH 2/2] drm/ttm: don't write to bo-reserved without holding glob-lru_lock

2010-03-01 Thread Jerome Glisse
On Mon, Mar 01, 2010 at 07:34:40PM +0100, Maarten Maathuis wrote:
 - The headerfile says you can't write to it without holding the lock.
 
 Signed-off-by: Maarten Maathuis madman2...@gmail.com

NAK, from my POV as we always use atomic_* on reserved it's useless
to protect it with spinlock.

Cheers,
Jerome

 ---
  drivers/gpu/drm/ttm/ttm_bo.c |8 +++-
  1 files changed, 7 insertions(+), 1 deletions(-)
 
 diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
 index f5333d9..2104885 100644
 --- a/drivers/gpu/drm/ttm/ttm_bo.c
 +++ b/drivers/gpu/drm/ttm/ttm_bo.c
 @@ -476,9 +476,9 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object 
 *bo, bool remove_all)
   drm_mm_put_block(bo-mem.mm_node);
   bo-mem.mm_node = NULL;
   }
 - spin_unlock(glob-lru_lock);
  
   atomic_set(bo-reserved, 0);
 + spin_unlock(glob-lru_lock);
  
   while (put_count--)
   kref_put(bo-list_kref, ttm_bo_ref_bug);
 @@ -1707,8 +1707,12 @@ EXPORT_SYMBOL(ttm_bo_wait);
  
  void ttm_bo_unblock_reservation(struct ttm_buffer_object *bo)
  {
 + struct ttm_bo_global *glob = bo-glob;
 +
 + spin_lock(glob-lru_lock);
   atomic_set(bo-reserved, 0);
   wake_up_all(bo-event_queue);
 + spin_unlock(glob-lru_lock);
  }
  
  int ttm_bo_block_reservation(struct ttm_buffer_object *bo, bool 
 interruptible,
 @@ -1849,8 +1853,10 @@ out:
* already swapped buffer.
*/
  
 + spin_lock(glob-lru_lock);
   atomic_set(bo-reserved, 0);
   wake_up_all(bo-event_queue);
 + spin_unlock(glob-lru_lock);
   kref_put(bo-list_kref, ttm_bo_release_list);
   return ret;
  }
 -- 
 1.7.0
 
 
 --
 Download Intel#174; Parallel Studio Eval
 Try the new software tools for yourself. Speed compiling, find bugs
 proactively, and fine-tune applications for parallel performance.
 See why Intel Parallel Studio got high marks during beta.
 http://p.sf.net/sfu/intel-sw-dev
 --
 ___
 Dri-devel mailing list
 Dri-devel@lists.sourceforge.net
 https://lists.sourceforge.net/lists/listinfo/dri-devel
 

--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


Re: [PATCH 1/2] drm/ttm: remove some bo-mutex remains

2010-03-01 Thread Jerome Glisse
On Mon, Mar 01, 2010 at 07:34:39PM +0100, Maarten Maathuis wrote:
 - A few comments existed here and there that referred to a bo-mutex.
 
 Signed-off-by: Maarten Maathuis madman2...@gmail.com

Reviewed-by: Jerome Glisse jgli...@redhat.com

 ---
  drivers/gpu/drm/ttm/ttm_bo.c|6 +-
  drivers/gpu/drm/ttm/ttm_bo_vm.c |2 +-
  include/drm/ttm/ttm_bo_api.h|3 +--
  include/drm/ttm/ttm_bo_driver.h |1 +
  4 files changed, 4 insertions(+), 8 deletions(-)
 
 diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
 index 2920f9a..f5333d9 100644
 --- a/drivers/gpu/drm/ttm/ttm_bo.c
 +++ b/drivers/gpu/drm/ttm/ttm_bo.c
 @@ -46,7 +46,6 @@
  #include linux/file.h
  #include linux/module.h
  
 -#define TTM_ASSERT_LOCKED(param)
  #define TTM_DEBUG(fmt, arg...)
  #define TTM_BO_HASH_ORDER 13
  
 @@ -306,9 +305,6 @@ void ttm_bo_unreserve(struct ttm_buffer_object *bo)
  }
  EXPORT_SYMBOL(ttm_bo_unreserve);
  
 -/*
 - * Call bo-mutex locked.
 - */
  static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, bool zero_alloc)
  {
   struct ttm_bo_device *bdev = bo-bdev;
 @@ -316,7 +312,7 @@ static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, 
 bool zero_alloc)
   int ret = 0;
   uint32_t page_flags = 0;
  
 - TTM_ASSERT_LOCKED(bo-mutex);
 + BUG_ON(!atomic_read(bo-reserved));
   bo-ttm = NULL;
  
   if (bdev-need_dma32)
 diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
 index 668dbe8..41b0c1e 100644
 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
 +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
 @@ -146,7 +146,7 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, 
 struct vm_fault *vmf)
* since the mmap_sem is only held in read mode. However, we
* modify only the caching bits of vma-vm_page_prot and
* consider those bits protected by
 -  * the bo-mutex, as we should be the only writers.
 +  * bo-reserved, as we should be the only writers.
* There shouldn't really be any readers of these bits except
* within vm_insert_mixed()? fork?
*
 diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
 index 81eb9f4..c1093ae 100644
 --- a/include/drm/ttm/ttm_bo_api.h
 +++ b/include/drm/ttm/ttm_bo_api.h
 @@ -35,7 +35,6 @@
  #include linux/kref.h
  #include linux/list.h
  #include linux/wait.h
 -#include linux/mutex.h
  #include linux/mm.h
  #include linux/rbtree.h
  #include linux/bitmap.h
 @@ -298,7 +297,7 @@ ttm_bo_reference(struct ttm_buffer_object *bo)
   * @interruptible:  Use interruptible wait.
   * @no_wait:  Return immediately if buffer is busy.
   *
 - * This function must be called with the bo::mutex held, and makes
 + * This function must be called with bo-reserved held, and makes
   * sure any previous rendering to the buffer is completed.
   * Note: It might be necessary to block validations before the
   * wait by reserving the buffer.
 diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
 index ff7664e..d3fc5f8 100644
 --- a/include/drm/ttm/ttm_bo_driver.h
 +++ b/include/drm/ttm/ttm_bo_driver.h
 @@ -37,6 +37,7 @@
  #include linux/workqueue.h
  #include linux/fs.h
  #include linux/spinlock.h
 +#include linux/mutex.h
  
  struct ttm_backend;
  
 -- 
 1.7.0
 
 
 --
 Download Intel#174; Parallel Studio Eval
 Try the new software tools for yourself. Speed compiling, find bugs
 proactively, and fine-tune applications for parallel performance.
 See why Intel Parallel Studio got high marks during beta.
 http://p.sf.net/sfu/intel-sw-dev
 --
 ___
 Dri-devel mailing list
 Dri-devel@lists.sourceforge.net
 https://lists.sourceforge.net/lists/listinfo/dri-devel
 

--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


Re: [PATCH] drm/radeon/kms: fence cleanup + more reliable GPU lockup detection

2010-02-28 Thread Jerome Glisse
On Sun, Feb 28, 2010 at 12:22:52PM +, Alan Swanson wrote:
 On Fri, 2010-02-26 at 15:49 +0100, Jerome Glisse wrote:
  This patch cleanup the fence code, it drops the timeout field of
  fence as the time to complete each IB is unpredictable and shouldn't
  be bound.
  
  The fence cleanup lead to GPU lockup detection improvement, this
  patch introduce a callback, allowing to do asic specific test for
  lockup detection. In this patch the CP is use as a first indicator
  of GPU lockup. If CP doesn't make progress during 1second we assume
  we are facing a GPU lockup.
  
  To avoid overhead of testing GPU lockup frequently due to fence
  taking time to be signaled we query the lockup callback every
  100msec. There is plenty code comment explaining the design  choise
  inside the code.
 
 Every 100msec? Is this running all the time? If so, that's not very good
 for CPU power saving to lower C-states in an idle system. We could at
 least use one of the round_jiffies.
 

This run only when userspace call bo wait thus it only happen when userspace
is waiting for something.

Cheers,
Jerome

--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


Re: [PATCH 1/2] drm/radeon/kms: rename gpu_reset to asic_reset V2

2010-02-27 Thread Jerome Glisse
On Fri, Feb 26, 2010 at 10:33:34PM +0100, Jerome Glisse wrote:
 Patch rename gpu_reset to asic_reset in prevision of having
 gpu_reset doing more stuff than just basic asic reset.
 
 V2 store the last time we had new fence in the fence driver
 so on EBUSY/ERESTART we still keep accurate timing information
 on how long we have been waiting for a given fence
 
 Signed-off-by: Jerome Glisse jgli...@redhat.com


Ignore this patch i messed up my git history  sight.

Jerome

--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


[PATCH] drm/radeon/kms: fence cleanup + more reliable GPU lockup detection

2010-02-26 Thread Jerome Glisse
This patch cleanup the fence code, it drops the timeout field of
fence as the time to complete each IB is unpredictable and shouldn't
be bound.

The fence cleanup lead to GPU lockup detection improvement, this
patch introduce a callback, allowing to do asic specific test for
lockup detection. In this patch the CP is use as a first indicator
of GPU lockup. If CP doesn't make progress during 1second we assume
we are facing a GPU lockup.

To avoid overhead of testing GPU lockup frequently due to fence
taking time to be signaled we query the lockup callback every
100msec. There is plenty code comment explaining the design  choise
inside the code.

This have been tested mostly on R3XX/R5XX hw, in normal running
destkop (compiz firefox, quake3 running) the lockup callback wasn't
call once (1 hour session). Also tested with forcing GPU lockup and
lockup was reported after the 1s CP activity timeout.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/radeon/evergreen.c|6 ++
 drivers/gpu/drm/radeon/r100.c |   84 +++
 drivers/gpu/drm/radeon/r300.c |   27 -
 drivers/gpu/drm/radeon/r600.c |   33 +-
 drivers/gpu/drm/radeon/radeon.h   |  101 ++--
 drivers/gpu/drm/radeon/radeon_asic.h  |   20 ++-
 drivers/gpu/drm/radeon/radeon_fence.c |   87 ++--
 drivers/gpu/drm/radeon/rv770.c|6 --
 8 files changed, 262 insertions(+), 102 deletions(-)

diff --git a/drivers/gpu/drm/radeon/evergreen.c 
b/drivers/gpu/drm/radeon/evergreen.c
index b8cd119..11688e2 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -485,6 +485,12 @@ int evergreen_mc_init(struct radeon_device *rdev)
return 0;
 }
 
+bool evergreen_gpu_is_lockup(struct radeon_device *rdev)
+{
+   /* FIXME: implement for evergreen */
+   return false;
+}
+
 int evergreen_gpu_reset(struct radeon_device *rdev)
 {
/* FIXME: implement for evergreen */
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 932ce24..a77e754 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -1780,6 +1780,90 @@ int r100_rb2d_reset(struct radeon_device *rdev)
return -1;
 }
 
+void r100_gpu_lockup_update(struct r100_gpu_lockup *lockup, struct radeon_cp 
*cp)
+{
+   lockup-last_cp_rptr = cp-rptr;
+   lockup-last_jiffies = jiffies;
+}
+
+/**
+ * r100_gpu_cp_is_lockup() - check if CP is lockup by recording information
+ * @rdev:  radeon device structure
+ * @lockup:r100_gpu_lockup structure holding CP lockup tracking 
informations
+ * @cp:radeon_cp structure holding CP information
+ *
+ * We don't need to initialize the lockup tracking information as we will 
either
+ * have CP rptr to a different value of jiffies wrap around which will force
+ * initialization of the lockup tracking informations.
+ *
+ * A possible false positivie is if we get call after while and last_cp_rptr ==
+ * the current CP rptr, even if it's unlikely it might happen. To avoid this
+ * if the elapsed time since last call is bigger than 2 second than we return
+ * false and update the tracking information. Due to this the caller must call
+ * r100_gpu_cp_is_lockup several time in less than 2sec for lockup to be 
reported
+ * the fencing code should be cautious about that.
+ *
+ * Caller should write to the ring to force CP to do something so we don't get
+ * false positive when CP is just gived nothing to do.
+ *
+ **/
+bool r100_gpu_cp_is_lockup(struct radeon_device *rdev, struct r100_gpu_lockup 
*lockup, struct radeon_cp *cp)
+{
+   unsigned long cjiffies, elapsed;
+
+   cjiffies = jiffies;
+   if (!time_after(cjiffies, lockup-last_jiffies)) {
+   /* likely a wrap around */
+   lockup-last_jiffies = jiffies;
+   return false;
+   }
+   if (cp-rptr != lockup-last_cp_rptr) {
+   /* CP is still working no lockup */
+   lockup-last_cp_rptr = cp-rptr;
+   lockup-last_jiffies = jiffies;
+   return false;
+   }
+   elapsed = jiffies_to_msecs(cjiffies - lockup-last_jiffies);
+   if (elapsed = 2000) {
+   /* very likely the improbable case where current
+* rptr is equal to last recorded, a while ago, rptr
+* this is more likely a false positive update tracking
+* information which should force us to be recall at
+* latter point
+*/
+   lockup-last_cp_rptr = cp-rptr;
+   lockup-last_jiffies = jiffies;
+   return false;
+   }
+   if (elapsed = 1000) {
+   dev_err(rdev-dev, GPU lockup CP stall for more than 
%lumsec\n, elapsed);
+   return true;
+   }
+   /* give a chance to the GPU ... */
+   return false;
+}
+
+bool r100_gpu_is_lockup

[PATCH] drm/radeon/kms: initialize set_surface_reg reg for rs600 asic

2010-02-26 Thread Jerome Glisse
rs600 asic was missing set_surface_reg callback leading to
oops.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/radeon/radeon_asic.h |2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_asic.h 
b/drivers/gpu/drm/radeon/radeon_asic.h
index 4572a66..d3a157b 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -407,6 +407,8 @@ static struct radeon_asic rs600_asic = {
.get_pcie_lanes = NULL,
.set_pcie_lanes = NULL,
.set_clock_gating = radeon_atom_set_clock_gating,
+   .set_surface_reg = r100_set_surface_reg,
+   .clear_surface_reg = r100_clear_surface_reg,
.bandwidth_update = rs600_bandwidth_update,
.hpd_init = rs600_hpd_init,
.hpd_fini = rs600_hpd_fini,
-- 
1.6.6


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


[PATCH 1/2] drm/radeon/kms: rename gpu_reset to asic_reset V2

2010-02-26 Thread Jerome Glisse
Patch rename gpu_reset to asic_reset in prevision of having
gpu_reset doing more stuff than just basic asic reset.

V2 store the last time we had new fence in the fence driver
so on EBUSY/ERESTART we still keep accurate timing information
on how long we have been waiting for a given fence

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/radeon/evergreen.c |2 +-
 drivers/gpu/drm/radeon/r100.c  |6 ++--
 drivers/gpu/drm/radeon/r300.c  |6 ++--
 drivers/gpu/drm/radeon/r420.c  |4 +-
 drivers/gpu/drm/radeon/r520.c  |4 +-
 drivers/gpu/drm/radeon/r600.c  |2 +-
 drivers/gpu/drm/radeon/radeon.h|8 +++---
 drivers/gpu/drm/radeon/radeon_asic.h   |   36 
 drivers/gpu/drm/radeon/radeon_device.c |2 +-
 drivers/gpu/drm/radeon/radeon_fence.c  |   23 
 drivers/gpu/drm/radeon/rs400.c |4 +-
 drivers/gpu/drm/radeon/rs600.c |4 +-
 drivers/gpu/drm/radeon/rs690.c |4 +-
 drivers/gpu/drm/radeon/rv515.c |8 +++---
 14 files changed, 59 insertions(+), 54 deletions(-)

diff --git a/drivers/gpu/drm/radeon/evergreen.c 
b/drivers/gpu/drm/radeon/evergreen.c
index 11688e2..748c58a 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -491,7 +491,7 @@ bool evergreen_gpu_is_lockup(struct radeon_device *rdev)
return false;
 }
 
-int evergreen_gpu_reset(struct radeon_device *rdev)
+int evergreen_asic_reset(struct radeon_device *rdev)
 {
/* FIXME: implement for evergreen */
return 0;
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index a77e754..029c55d 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -1864,7 +1864,7 @@ bool r100_gpu_is_lockup(struct radeon_device *rdev)
return r100_gpu_cp_is_lockup(rdev, rdev-config.r100.lockup, 
rdev-cp);
 }
 
-int r100_gpu_reset(struct radeon_device *rdev)
+int r100_asic_reset(struct radeon_device *rdev)
 {
uint32_t status;
 
@@ -3503,7 +3503,7 @@ int r100_resume(struct radeon_device *rdev)
/* Resume clock before doing reset */
r100_clock_startup(rdev);
/* Reset gpu before posting otherwise ATOM will enter infinite loop */
-   if (radeon_gpu_reset(rdev)) {
+   if (radeon_asic_reset(rdev)) {
dev_warn(rdev-dev, GPU reset failed ! (0xE40=0x%08X, 
0x7C0=0x%08X)\n,
RREG32(R_000E40_RBBM_STATUS),
RREG32(R_0007C0_CP_STAT));
@@ -3571,7 +3571,7 @@ int r100_init(struct radeon_device *rdev)
return r;
}
/* Reset gpu before posting otherwise ATOM will enter infinite loop */
-   if (radeon_gpu_reset(rdev)) {
+   if (radeon_asic_reset(rdev)) {
dev_warn(rdev-dev,
GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n,
RREG32(R_000E40_RBBM_STATUS),
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 2c5d272..f033562 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -446,7 +446,7 @@ bool r300_gpu_is_lockup(struct radeon_device *rdev)
return r100_gpu_cp_is_lockup(rdev, rdev-config.r300.lockup, 
rdev-cp);
 }
 
-int r300_gpu_reset(struct radeon_device *rdev)
+int r300_asic_reset(struct radeon_device *rdev)
 {
uint32_t status;
 
@@ -1326,7 +1326,7 @@ int r300_resume(struct radeon_device *rdev)
/* Resume clock before doing reset */
r300_clock_startup(rdev);
/* Reset gpu before posting otherwise ATOM will enter infinite loop */
-   if (radeon_gpu_reset(rdev)) {
+   if (radeon_asic_reset(rdev)) {
dev_warn(rdev-dev, GPU reset failed ! (0xE40=0x%08X, 
0x7C0=0x%08X)\n,
RREG32(R_000E40_RBBM_STATUS),
RREG32(R_0007C0_CP_STAT));
@@ -1396,7 +1396,7 @@ int r300_init(struct radeon_device *rdev)
return r;
}
/* Reset gpu before posting otherwise ATOM will enter infinite loop */
-   if (radeon_gpu_reset(rdev)) {
+   if (radeon_asic_reset(rdev)) {
dev_warn(rdev-dev,
GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n,
RREG32(R_000E40_RBBM_STATUS),
diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c
index e0eb9b6..8e6d852 100644
--- a/drivers/gpu/drm/radeon/r420.c
+++ b/drivers/gpu/drm/radeon/r420.c
@@ -233,7 +233,7 @@ int r420_resume(struct radeon_device *rdev)
/* Resume clock before doing reset */
r420_clock_resume(rdev);
/* Reset gpu before posting otherwise ATOM will enter infinite loop */
-   if (radeon_gpu_reset(rdev)) {
+   if (radeon_asic_reset(rdev)) {
dev_warn(rdev-dev, GPU reset failed ! (0xE40=0x%08X, 
0x7C0=0x%08X)\n,
RREG32

  1   2   3   4   5   6   7   8   >