On Thu, May 24, 2012 at 09:49:07AM +0200, Christian König wrote:
> Move inter ring syncing with semaphores into the
> existing ring allocations, with that we need to
> lock the ring mutex only once.
> 
> Signed-off-by: Christian König <deathsim...@vodafone.de>

Reviewed-by: Jerome Glisse <jgli...@redhat.com>

> ---
>  drivers/gpu/drm/radeon/evergreen_blit_kms.c |    3 +-
>  drivers/gpu/drm/radeon/r600.c               |    5 +-
>  drivers/gpu/drm/radeon/r600_blit_kms.c      |   24 +++++++--
>  drivers/gpu/drm/radeon/radeon.h             |    6 +--
>  drivers/gpu/drm/radeon/radeon_asic.h        |    5 +-
>  drivers/gpu/drm/radeon/radeon_cs.c          |   38 +++-----------
>  drivers/gpu/drm/radeon/radeon_ring.c        |   30 +++++++++--
>  drivers/gpu/drm/radeon/radeon_semaphore.c   |   71 
> ++++++++++-----------------
>  drivers/gpu/drm/radeon/radeon_test.c        |    6 +--
>  drivers/gpu/drm/radeon/radeon_ttm.c         |   20 --------
>  10 files changed, 92 insertions(+), 116 deletions(-)
> 
> diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c 
> b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
> index 1e96bd4..e512560 100644
> --- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c
> +++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
> @@ -622,7 +622,8 @@ int evergreen_blit_init(struct radeon_device *rdev)
>       rdev->r600_blit.primitives.draw_auto = draw_auto;
>       rdev->r600_blit.primitives.set_default_state = set_default_state;
>  
> -     rdev->r600_blit.ring_size_common = 55; /* shaders + def state */
> +     rdev->r600_blit.ring_size_common = 8; /* sync semaphore */
> +     rdev->r600_blit.ring_size_common += 55; /* shaders + def state */
>       rdev->r600_blit.ring_size_common += 16; /* fence emit for VB IB */
>       rdev->r600_blit.ring_size_common += 5; /* done copy */
>       rdev->r600_blit.ring_size_common += 16; /* fence emit for done copy */
> diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
> index e5279f9..a8d8c44 100644
> --- a/drivers/gpu/drm/radeon/r600.c
> +++ b/drivers/gpu/drm/radeon/r600.c
> @@ -2371,15 +2371,16 @@ int r600_copy_blit(struct radeon_device *rdev,
>                  unsigned num_gpu_pages,
>                  struct radeon_fence **fence)
>  {
> +     struct radeon_semaphore *sem = NULL;
>       struct radeon_sa_bo *vb = NULL;
>       int r;
>  
> -     r = r600_blit_prepare_copy(rdev, num_gpu_pages, &vb);
> +     r = r600_blit_prepare_copy(rdev, num_gpu_pages, fence, &vb, &sem);
>       if (r) {
>               return r;
>       }
>       r600_kms_blit_copy(rdev, src_offset, dst_offset, num_gpu_pages, vb);
> -     r600_blit_done_copy(rdev, fence, vb);
> +     r600_blit_done_copy(rdev, fence, vb, sem);
>       return 0;
>  }
>  
> diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c 
> b/drivers/gpu/drm/radeon/r600_blit_kms.c
> index 02f4eeb..2b8d641 100644
> --- a/drivers/gpu/drm/radeon/r600_blit_kms.c
> +++ b/drivers/gpu/drm/radeon/r600_blit_kms.c
> @@ -512,7 +512,8 @@ int r600_blit_init(struct radeon_device *rdev)
>       rdev->r600_blit.primitives.draw_auto = draw_auto;
>       rdev->r600_blit.primitives.set_default_state = set_default_state;
>  
> -     rdev->r600_blit.ring_size_common = 40; /* shaders + def state */
> +     rdev->r600_blit.ring_size_common = 8; /* sync semaphore */
> +     rdev->r600_blit.ring_size_common += 40; /* shaders + def state */
>       rdev->r600_blit.ring_size_common += 5; /* done copy */
>       rdev->r600_blit.ring_size_common += 16; /* fence emit for done copy */
>  
> @@ -666,7 +667,8 @@ static unsigned r600_blit_create_rect(unsigned 
> num_gpu_pages,
>  
>  
>  int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned 
> num_gpu_pages,
> -                        struct radeon_sa_bo **vb)
> +                        struct radeon_fence **fence, struct radeon_sa_bo 
> **vb,
> +                        struct radeon_semaphore **sem)
>  {
>       struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
>       int r;
> @@ -689,22 +691,37 @@ int r600_blit_prepare_copy(struct radeon_device *rdev, 
> unsigned num_gpu_pages,
>               return r;
>       }
>  
> +     r = radeon_semaphore_create(rdev, sem);
> +     if (r) {
> +             radeon_sa_bo_free(rdev, vb, NULL);
> +             return r;
> +     }
> +
>       /* calculate number of loops correctly */
>       ring_size = num_loops * dwords_per_loop;
>       ring_size += rdev->r600_blit.ring_size_common;
>       r = radeon_ring_lock(rdev, ring, ring_size);
>       if (r) {
>               radeon_sa_bo_free(rdev, vb, NULL);
> +             radeon_semaphore_free(rdev, sem, NULL);
>               return r;
>       }
>  
> +     if (radeon_fence_need_sync(*fence, RADEON_RING_TYPE_GFX_INDEX)) {
> +             radeon_semaphore_sync_rings(rdev, *sem, (*fence)->ring,
> +                                         RADEON_RING_TYPE_GFX_INDEX);
> +             radeon_fence_note_sync(*fence, RADEON_RING_TYPE_GFX_INDEX);
> +     } else {
> +             radeon_semaphore_free(rdev, sem, NULL);
> +     }
> +
>       rdev->r600_blit.primitives.set_default_state(rdev);
>       rdev->r600_blit.primitives.set_shaders(rdev);
>       return 0;
>  }
>  
>  void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence 
> **fence,
> -                      struct radeon_sa_bo *vb)
> +                      struct radeon_sa_bo *vb, struct radeon_semaphore *sem)
>  {
>       struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
>       int r;
> @@ -717,6 +734,7 @@ void r600_blit_done_copy(struct radeon_device *rdev, 
> struct radeon_fence **fence
>  
>       radeon_ring_unlock_commit(rdev, ring);
>       radeon_sa_bo_free(rdev, &vb, *fence);
> +     radeon_semaphore_free(rdev, &sem, *fence);
>  }
>  
>  void r600_kms_blit_copy(struct radeon_device *rdev,
> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
> index 4e232c3..aebaf28 100644
> --- a/drivers/gpu/drm/radeon/radeon.h
> +++ b/drivers/gpu/drm/radeon/radeon.h
> @@ -465,10 +465,9 @@ void radeon_semaphore_emit_wait(struct radeon_device 
> *rdev, int ring,
>                               struct radeon_semaphore *semaphore);
>  int radeon_semaphore_sync_rings(struct radeon_device *rdev,
>                               struct radeon_semaphore *semaphore,
> -                             bool sync_to[RADEON_NUM_RINGS],
> -                             int dst_ring);
> +                             int signaler, int waiter);
>  void radeon_semaphore_free(struct radeon_device *rdev,
> -                        struct radeon_semaphore *semaphore,
> +                        struct radeon_semaphore **semaphore,
>                          struct radeon_fence *fence);
>  
>  /*
> @@ -648,6 +647,7 @@ struct radeon_ib {
>       struct radeon_fence             *fence;
>       unsigned                        vm_id;
>       bool                            is_const_ib;
> +     struct radeon_fence             *sync_to[RADEON_NUM_RINGS];
>       struct radeon_semaphore         *semaphore;
>  };
>  
> diff --git a/drivers/gpu/drm/radeon/radeon_asic.h 
> b/drivers/gpu/drm/radeon/radeon_asic.h
> index 8cdf075..94c427a 100644
> --- a/drivers/gpu/drm/radeon/radeon_asic.h
> +++ b/drivers/gpu/drm/radeon/radeon_asic.h
> @@ -363,9 +363,10 @@ int r600_hdmi_buffer_status_changed(struct drm_encoder 
> *encoder);
>  void r600_hdmi_update_audio_settings(struct drm_encoder *encoder);
>  /* r600 blit */
>  int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned 
> num_gpu_pages,
> -                        struct radeon_sa_bo **vb);
> +                        struct radeon_fence **fence, struct radeon_sa_bo 
> **vb,
> +                        struct radeon_semaphore **sem);
>  void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence 
> **fence,
> -                      struct radeon_sa_bo *vb);
> +                      struct radeon_sa_bo *vb, struct radeon_semaphore *sem);
>  void r600_kms_blit_copy(struct radeon_device *rdev,
>                       u64 src_gpu_addr, u64 dst_gpu_addr,
>                       unsigned num_gpu_pages,
> diff --git a/drivers/gpu/drm/radeon/radeon_cs.c 
> b/drivers/gpu/drm/radeon/radeon_cs.c
> index c7d64a7..d295821 100644
> --- a/drivers/gpu/drm/radeon/radeon_cs.c
> +++ b/drivers/gpu/drm/radeon/radeon_cs.c
> @@ -115,36 +115,20 @@ static int radeon_cs_get_ring(struct radeon_cs_parser 
> *p, u32 ring, s32 priority
>       return 0;
>  }
>  
> -static int radeon_cs_sync_rings(struct radeon_cs_parser *p)
> +static void radeon_cs_sync_rings(struct radeon_cs_parser *p)
>  {
> -     bool sync_to_ring[RADEON_NUM_RINGS] = { };
> -     bool need_sync = false;
> -     int i, r;
> +     int i;
>  
>       for (i = 0; i < p->nrelocs; i++) {
> -             struct radeon_fence *fence;
> +             struct radeon_fence *a, *b;
>  
>               if (!p->relocs[i].robj || !p->relocs[i].robj->tbo.sync_obj)
>                       continue;
>  
> -             fence = p->relocs[i].robj->tbo.sync_obj;
> -             if (fence->ring != p->ring && !radeon_fence_signaled(fence)) {
> -                     sync_to_ring[fence->ring] = true;
> -                     need_sync = true;
> -             }
> -     }
> -
> -     if (!need_sync) {
> -             return 0;
> -     }
> -
> -     r = radeon_semaphore_create(p->rdev, &p->ib.semaphore);
> -     if (r) {
> -             return r;
> +             a = p->relocs[i].robj->tbo.sync_obj;
> +             b = p->ib.sync_to[a->ring];
> +             p->ib.sync_to[a->ring] = radeon_fence_later(a, b);
>       }
> -
> -     return radeon_semaphore_sync_rings(p->rdev, p->ib.semaphore,
> -                                        sync_to_ring, p->ring);
>  }
>  
>  int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
> @@ -365,10 +349,7 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev,
>               DRM_ERROR("Invalid command stream !\n");
>               return r;
>       }
> -     r = radeon_cs_sync_rings(parser);
> -     if (r) {
> -             DRM_ERROR("Failed to synchronize rings !\n");
> -     }
> +     radeon_cs_sync_rings(parser);
>       parser->ib.vm_id = 0;
>       r = radeon_ib_schedule(rdev, &parser->ib);
>       if (r) {
> @@ -465,10 +446,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device 
> *rdev,
>       if (r) {
>               goto out;
>       }
> -     r = radeon_cs_sync_rings(parser);
> -     if (r) {
> -             DRM_ERROR("Failed to synchronize rings !\n");
> -     }
> +     radeon_cs_sync_rings(parser);
>  
>       if ((rdev->family >= CHIP_TAHITI) &&
>           (parser->chunk_const_ib_idx != -1)) {
> diff --git a/drivers/gpu/drm/radeon/radeon_ring.c 
> b/drivers/gpu/drm/radeon/radeon_ring.c
> index a0b9970..87f66c5 100644
> --- a/drivers/gpu/drm/radeon/radeon_ring.c
> +++ b/drivers/gpu/drm/radeon/radeon_ring.c
> @@ -67,7 +67,7 @@ u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx)
>  int radeon_ib_get(struct radeon_device *rdev, int ring,
>                 struct radeon_ib *ib, unsigned size)
>  {
> -     int r;
> +     int i, r;
>  
>       r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &ib->sa_bo, size, 256, 
> true);
>       if (r) {
> @@ -75,20 +75,26 @@ int radeon_ib_get(struct radeon_device *rdev, int ring,
>               return r;
>       }
>  
> +     r = radeon_semaphore_create(rdev, &ib->semaphore);
> +     if (r) {
> +             return r;
> +     }
> +
>       ib->ring = ring;
>       ib->fence = NULL;
>       ib->ptr = radeon_sa_bo_cpu_addr(ib->sa_bo);
>       ib->gpu_addr = radeon_sa_bo_gpu_addr(ib->sa_bo);
>       ib->vm_id = 0;
>       ib->is_const_ib = false;
> -     ib->semaphore = NULL;
> +     for (i = 0; i < RADEON_NUM_RINGS; ++i)
> +             ib->sync_to[i] = NULL;
>  
>       return 0;
>  }
>  
>  void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib)
>  {
> -     radeon_semaphore_free(rdev, ib->semaphore, ib->fence);
> +     radeon_semaphore_free(rdev, &ib->semaphore, ib->fence);
>       radeon_sa_bo_free(rdev, &ib->sa_bo, ib->fence);
>       radeon_fence_unref(&ib->fence);
>  }
> @@ -96,7 +102,8 @@ void radeon_ib_free(struct radeon_device *rdev, struct 
> radeon_ib *ib)
>  int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib)
>  {
>       struct radeon_ring *ring = &rdev->ring[ib->ring];
> -     int r = 0;
> +     bool need_sync = false;
> +     int i, r = 0;
>  
>       if (!ib->length_dw || !ring->ready) {
>               /* TODO: Nothings in the ib we should report. */
> @@ -105,11 +112,24 @@ int radeon_ib_schedule(struct radeon_device *rdev, 
> struct radeon_ib *ib)
>       }
>  
>       /* 64 dwords should be enough for fence too */
> -     r = radeon_ring_lock(rdev, ring, 64);
> +     r = radeon_ring_lock(rdev, ring, 64 + RADEON_NUM_RINGS * 8);
>       if (r) {
>               dev_err(rdev->dev, "scheduling IB failed (%d).\n", r);
>               return r;
>       }
> +     for (i = 0; i < RADEON_NUM_RINGS; ++i) {
> +             struct radeon_fence *fence = ib->sync_to[i];
> +             if (radeon_fence_need_sync(fence, ib->ring)) {
> +                     need_sync = true;
> +                     radeon_semaphore_sync_rings(rdev, ib->semaphore,
> +                                                 fence->ring, ib->ring);
> +                     radeon_fence_note_sync(fence, ib->ring);
> +             }
> +     }
> +     /* immediately free semaphore when we don't need to sync */
> +     if (!need_sync) {
> +             radeon_semaphore_free(rdev, &ib->semaphore, NULL);
> +     }
>       radeon_ring_ib_execute(rdev, ib->ring, ib);
>       r = radeon_fence_emit(rdev, &ib->fence, ib->ring);
>       if (r) {
> diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c 
> b/drivers/gpu/drm/radeon/radeon_semaphore.c
> index e2ace5d..7cc78de 100644
> --- a/drivers/gpu/drm/radeon/radeon_semaphore.c
> +++ b/drivers/gpu/drm/radeon/radeon_semaphore.c
> @@ -68,70 +68,49 @@ void radeon_semaphore_emit_wait(struct radeon_device 
> *rdev, int ring,
>       radeon_semaphore_ring_emit(rdev, ring, &rdev->ring[ring], semaphore, 
> true);
>  }
>  
> +/* caller must hold ring lock */
>  int radeon_semaphore_sync_rings(struct radeon_device *rdev,
>                               struct radeon_semaphore *semaphore,
> -                             bool sync_to[RADEON_NUM_RINGS],
> -                             int dst_ring)
> +                             int signaler, int waiter)
>  {
> -     int i = 0, r;
> +     int r;
>  
> -     mutex_lock(&rdev->ring_lock);
> -     r = radeon_ring_alloc(rdev, &rdev->ring[dst_ring], RADEON_NUM_RINGS * 
> 8);
> -     if (r) {
> -             goto error;
> +     /* no need to signal and wait on the same ring */
> +     if (signaler == waiter) {
> +             return 0;
>       }
>  
> -     for (i = 0; i < RADEON_NUM_RINGS; ++i) {
> -             /* no need to sync to our own or unused rings */
> -             if (!sync_to[i] || i == dst_ring)
> -                     continue;
> -
> -             /* prevent GPU deadlocks */
> -             if (!rdev->ring[i].ready) {
> -                     dev_err(rdev->dev, "Trying to sync to a disabled 
> ring!");
> -                     r = -EINVAL;
> -                     goto error;
> -             }
> -
> -             r = radeon_ring_alloc(rdev, &rdev->ring[i], 8);
> -             if (r) {
> -                     goto error;
> -             }
> -
> -             radeon_semaphore_emit_signal(rdev, i, semaphore);
> -             radeon_semaphore_emit_wait(rdev, dst_ring, semaphore);
> +     /* prevent GPU deadlocks */
> +     if (!rdev->ring[signaler].ready) {
> +             dev_err(rdev->dev, "Trying to sync to a disabled ring!");
> +             return -EINVAL;
> +     }
>  
> -             radeon_ring_commit(rdev, &rdev->ring[i]);
> +     r = radeon_ring_alloc(rdev, &rdev->ring[signaler], 8);
> +     if (r) {
> +             return r;
>       }
> +     radeon_semaphore_emit_signal(rdev, signaler, semaphore);
> +     radeon_ring_commit(rdev, &rdev->ring[signaler]);
>  
> -     radeon_ring_commit(rdev, &rdev->ring[dst_ring]);
> -     mutex_unlock(&rdev->ring_lock);
> +     /* we assume caller has already allocated space on waiters ring */
> +     radeon_semaphore_emit_wait(rdev, waiter, semaphore);
>  
>       return 0;
> -
> -error:
> -     /* unlock all locks taken so far */
> -     for (--i; i >= 0; --i) {
> -             if (sync_to[i] || i == dst_ring) {
> -                     radeon_ring_undo(&rdev->ring[i]);
> -             }
> -     }
> -     radeon_ring_undo(&rdev->ring[dst_ring]);
> -     mutex_unlock(&rdev->ring_lock);
> -     return r;
>  }
>  
>  void radeon_semaphore_free(struct radeon_device *rdev,
> -                        struct radeon_semaphore *semaphore,
> +                        struct radeon_semaphore **semaphore,
>                          struct radeon_fence *fence)
>  {
> -     if (semaphore == NULL) {
> +     if (semaphore == NULL || *semaphore == NULL) {
>               return;
>       }
> -     if (semaphore->waiters > 0) {
> +     if ((*semaphore)->waiters > 0) {
>               dev_err(rdev->dev, "semaphore %p has more waiters than 
> signalers,"
> -                     " hardware lockup imminent!\n", semaphore);
> +                     " hardware lockup imminent!\n", *semaphore);
>       }
> -     radeon_sa_bo_free(rdev, &semaphore->sa_bo, fence);
> -     kfree(semaphore);
> +     radeon_sa_bo_free(rdev, &(*semaphore)->sa_bo, fence);
> +     kfree(*semaphore);
> +     *semaphore = NULL;
>  }
> diff --git a/drivers/gpu/drm/radeon/radeon_test.c 
> b/drivers/gpu/drm/radeon/radeon_test.c
> index 47e1535..a94f66f 100644
> --- a/drivers/gpu/drm/radeon/radeon_test.c
> +++ b/drivers/gpu/drm/radeon/radeon_test.c
> @@ -303,8 +303,7 @@ void radeon_test_ring_sync(struct radeon_device *rdev,
>       }
>  
>  out_cleanup:
> -     if (semaphore)
> -             radeon_semaphore_free(rdev, semaphore, NULL);
> +     radeon_semaphore_free(rdev, &semaphore, NULL);
>  
>       if (fence1)
>               radeon_fence_unref(&fence1);
> @@ -422,8 +421,7 @@ void radeon_test_ring_sync2(struct radeon_device *rdev,
>       }
>  
>  out_cleanup:
> -     if (semaphore)
> -             radeon_semaphore_free(rdev, semaphore, NULL);
> +     radeon_semaphore_free(rdev, &semaphore, NULL);
>  
>       if (fenceA)
>               radeon_fence_unref(&fenceA);
> diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
> b/drivers/gpu/drm/radeon/radeon_ttm.c
> index 2d36bdd..c43035c 100644
> --- a/drivers/gpu/drm/radeon/radeon_ttm.c
> +++ b/drivers/gpu/drm/radeon/radeon_ttm.c
> @@ -223,7 +223,6 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
>       struct radeon_device *rdev;
>       uint64_t old_start, new_start;
>       struct radeon_fence *fence;
> -     struct radeon_semaphore *sem = NULL;
>       int r, ridx;
>  
>       rdev = radeon_get_rdev(bo->bdev);
> @@ -262,31 +261,12 @@ static int radeon_move_blit(struct ttm_buffer_object 
> *bo,
>  
>       /* sync other rings */
>       fence = bo->sync_obj;
> -     if (fence && fence->ring != ridx
> -         && !radeon_fence_signaled(fence)) {
> -             bool sync_to_ring[RADEON_NUM_RINGS] = { };
> -             sync_to_ring[fence->ring] = true;
> -
> -             r = radeon_semaphore_create(rdev, &sem);
> -             if (r) {
> -                     return r;
> -             }
> -
> -             r = radeon_semaphore_sync_rings(rdev, sem, sync_to_ring, ridx);
> -             if (r) {
> -                     radeon_semaphore_free(rdev, sem, NULL);
> -                     return r;
> -             }
> -     }
> -
> -     fence = NULL;
>       r = radeon_copy(rdev, old_start, new_start,
>                       new_mem->num_pages * (PAGE_SIZE / 
> RADEON_GPU_PAGE_SIZE), /* GPU pages */
>                       &fence);
>       /* FIXME: handle copy error */
>       r = ttm_bo_move_accel_cleanup(bo, (void *)fence, NULL,
>                                     evict, no_wait_reserve, no_wait_gpu, 
> new_mem);
> -     radeon_semaphore_free(rdev, sem, fence);
>       radeon_fence_unref(&fence);
>       return r;
>  }
> -- 
> 1.7.9.5
> 
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

Reply via email to