On Sunday 09 March 2014, 02:24:51, Marek Olšák wrote:
> From: Marek Olšák <marek.ol...@amd.com>
> 
> ---
>  src/gallium/drivers/r600/evergreen_state.c      | 37 +++++++++-------
>  src/gallium/drivers/r600/r600_state.c           | 41 ++++++++++-------
>  src/gallium/drivers/radeon/r600_buffer_common.c | 58
> +++++++++++-------------- src/gallium/drivers/radeon/r600_pipe_common.h   |
> 17 ++++----
>  src/gallium/drivers/radeon/r600_texture.c       | 18 +++-----
>  src/gallium/drivers/radeonsi/si_state.c         | 19 ++++----
>  6 files changed, 97 insertions(+), 93 deletions(-)
> 
> diff --git a/src/gallium/drivers/r600/evergreen_state.c
> b/src/gallium/drivers/r600/evergreen_state.c index dca7c58..5e57f8d 100644
> --- a/src/gallium/drivers/r600/evergreen_state.c
> +++ b/src/gallium/drivers/r600/evergreen_state.c
> @@ -3329,13 +3329,13 @@ static void evergreen_dma_copy_tile(struct
> r600_context *rctx, }
>  }
> 
> -static boolean evergreen_dma_blit(struct pipe_context *ctx,
> -                               struct pipe_resource *dst,
> -                               unsigned dst_level,
> -                               unsigned dst_x, unsigned dst_y, unsigned 
> dst_z,
> -                               struct pipe_resource *src,
> -                               unsigned src_level,
> -                               const struct pipe_box *src_box)
> +static void evergreen_dma_blit(struct pipe_context *ctx,
> +                            struct pipe_resource *dst,
> +                            unsigned dst_level,
> +                            unsigned dst_x, unsigned dst_y, unsigned dst_z,
> +                            struct pipe_resource *src,
> +                            unsigned src_level,
> +                            const struct pipe_box *src_box)
>  {
>       struct r600_context *rctx = (struct r600_context *)ctx;
>       struct r600_texture *rsrc = (struct r600_texture*)src;
> @@ -3345,19 +3345,22 @@ static boolean evergreen_dma_blit(struct
> pipe_context *ctx, unsigned src_x, src_y;
> 
>       if (rctx->b.rings.dma.cs == NULL) {
> -             return FALSE;
> +             goto fallback;
>       }
> 
>       if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
> +             if (dst_x % 4 || src_box->x % 4 || src_box->width % 4)
> +                     goto fallback;

Why do we need this? I think that the async DMA engine can handle byte aligned 
copies. It is streamout that needs x and width to be dw aligned, isn't it?

> +
>               evergreen_dma_copy(rctx, dst, src, dst_x, src_box->x, src_box-
>width);
> -             return TRUE;
> +             return;
>       }
> 
>       if (src->format != dst->format) {
> -             return FALSE;
> +             goto fallback;
>       }
>       if (rdst->dirty_level_mask != 0) {
> -             return FALSE;
> +             goto fallback;
>       }
>       if (rsrc->dirty_level_mask) {
>               ctx->flush_resource(ctx, src);
> @@ -3383,13 +3386,13 @@ static boolean evergreen_dma_blit(struct
> pipe_context *ctx,
> 
>       if (src_pitch != dst_pitch || src_box->x || dst_x || src_w != dst_w) {
>               /* FIXME evergreen can do partial blit */
> -             return FALSE;
> +             goto fallback;
>       }
>       /* the x test here are currently useless (because we don't support 
partial
> blit) * but keep them around so we don't forget about those
>        */
>       if ((src_pitch & 0x7) || (src_box->x & 0x7) || (dst_x & 0x7) ||
> (src_box->y & 0x7) || (dst_y & 0x7)) { -              return FALSE;
> +             goto fallback;
>       }
> 
>       /* 128 bpp surfaces require non_disp_tiling for both
> @@ -3400,7 +3403,7 @@ static boolean evergreen_dma_blit(struct pipe_context
> *ctx, if ((rctx->b.chip_class == CAYMAN) &&
>           (src_mode != dst_mode) &&
>           (util_format_get_blocksize(src->format) >= 16)) {
> -             return FALSE;
> +             goto fallback;
>       }
> 
>       if (src_mode == dst_mode) {
> @@ -3423,7 +3426,11 @@ static boolean evergreen_dma_blit(struct pipe_context
> *ctx, src, src_level, src_x, src_y, src_box->z,
>                                       copy_height, dst_pitch, bpp);
>       }
> -     return TRUE;
> +     return;
> +
> +fallback:
> +     ctx->resource_copy_region(ctx, dst, dst_level, dst_x, dst_y, dst_z,
> +                               src, src_level, src_box);
>  }
> 
>  void evergreen_init_state_functions(struct r600_context *rctx)
> diff --git a/src/gallium/drivers/r600/r600_state.c
> b/src/gallium/drivers/r600/r600_state.c index 6d89e6c..a0e6d2d 100644
> --- a/src/gallium/drivers/r600/r600_state.c
> +++ b/src/gallium/drivers/r600/r600_state.c
> @@ -2883,13 +2883,13 @@ static boolean r600_dma_copy_tile(struct
> r600_context *rctx, return TRUE;
>  }
> 
> -static boolean r600_dma_blit(struct pipe_context *ctx,
> -                          struct pipe_resource *dst,
> -                          unsigned dst_level,
> -                          unsigned dst_x, unsigned dst_y, unsigned dst_z,
> -                          struct pipe_resource *src,
> -                          unsigned src_level,
> -                          const struct pipe_box *src_box)
> +static void r600_dma_blit(struct pipe_context *ctx,
> +                       struct pipe_resource *dst,
> +                       unsigned dst_level,
> +                       unsigned dst_x, unsigned dst_y, unsigned dst_z,
> +                       struct pipe_resource *src,
> +                       unsigned src_level,
> +                       const struct pipe_box *src_box)
>  {
>       struct r600_context *rctx = (struct r600_context *)ctx;
>       struct r600_texture *rsrc = (struct r600_texture*)src;
> @@ -2899,16 +2899,19 @@ static boolean r600_dma_blit(struct pipe_context
> *ctx, unsigned src_x, src_y;
> 
>       if (rctx->b.rings.dma.cs == NULL) {
> -             return FALSE;
> +             goto fallback;
>       }
> 
>       if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
> +             if (dst_x % 4 || src_box->x % 4 || src_box->width % 4)
> +                     goto fallback;
> +
>               r600_dma_copy(rctx, dst, src, dst_x, src_box->x, 
> src_box->width);
> -             return TRUE;
> +             return;
>       }
> 
>       if (src->format != dst->format) {
> -             return FALSE;
> +             goto fallback;
>       }
> 
>       src_x = util_format_get_nblocksx(src->format, src_box->x);
> @@ -2931,11 +2934,11 @@ static boolean r600_dma_blit(struct pipe_context
> *ctx,
> 
>       if (src_pitch != dst_pitch || src_box->x || dst_x || src_w != dst_w) {
>               /* strick requirement on r6xx/r7xx */
> -             return FALSE;
> +             goto fallback;
>       }
>       /* lot of constraint on alignment this should capture them all */
>       if ((src_pitch & 0x7) || (src_box->y & 0x7) || (dst_y & 0x7)) {
> -             return FALSE;
> +             goto fallback;
>       }
> 
>       if (src_mode == dst_mode) {
> @@ -2955,15 +2958,21 @@ static boolean r600_dma_blit(struct pipe_context
> *ctx, size = src_box->height * src_pitch;
>               /* must be dw aligned */
>               if ((dst_offset & 0x3) || (src_offset & 0x3) || (size & 0x3)) {
> -                     return FALSE;
> +                     goto fallback;
>               }
>               r600_dma_copy(rctx, dst, src, dst_offset, src_offset, size);
>       } else {
> -             return r600_dma_copy_tile(rctx, dst, dst_level, dst_x, dst_y, 
> dst_z,
> +             if (!r600_dma_copy_tile(rctx, dst, dst_level, dst_x, dst_y, 
> dst_z,
>                                       src, src_level, src_x, src_y, 
> src_box->z,
> -                                     copy_height, dst_pitch, bpp);
> +                                     copy_height, dst_pitch, bpp)) {
> +                     goto fallback;
> +             }
>       }
> -     return TRUE;
> +     return;
> +
> +fallback:
> +     ctx->resource_copy_region(ctx, dst, dst_level, dst_x, dst_y, dst_z,
> +                               src, src_level, src_box);
>  }
> 
>  void r600_init_state_functions(struct r600_context *rctx)
> diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c
> b/src/gallium/drivers/radeon/r600_buffer_common.c index 90ca8cb..a7ecfb3
> 100644
> --- a/src/gallium/drivers/radeon/r600_buffer_common.c
> +++ b/src/gallium/drivers/radeon/r600_buffer_common.c
> @@ -190,6 +190,17 @@ static void *r600_buffer_get_transfer(struct
> pipe_context *ctx, return data;
>  }
> 
> +static bool r600_can_dma_copy_buffer(struct r600_common_context *rctx,
> +                                  unsigned dstx, unsigned srcx, unsigned 
> size)
> +{
> +     bool dword_aligned = !(dstx % 4) && !(srcx % 4) && !(size % 4);
> +
> +     return rctx->screen->has_cp_dma ||
> +            (dword_aligned && (rctx->rings.dma.cs ||
> +                               rctx->screen->has_streamout));
> +
> +}
> +
>  static void *r600_buffer_transfer_map(struct pipe_context *ctx,
>                                        struct pipe_resource *resource,
>                                        unsigned level,
> @@ -233,10 +244,7 @@ static void *r600_buffer_transfer_map(struct
> pipe_context *ctx, else if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
>                !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
>                !(rscreen->debug_flags & DBG_NO_DISCARD_RANGE) &&
> -              (rscreen->has_cp_dma ||
> -               (rscreen->has_streamout &&
> -                /* The buffer range must be aligned to 4 with streamout. */
> -                box->x % 4 == 0 && box->width % 4 == 0))) {
> +              r600_can_dma_copy_buffer(rctx, box->x, 0, box->width)) {
>               assert(usage & PIPE_TRANSFER_WRITE);
> 
>               /* Check if mapping this buffer would cause waiting for the 
> GPU. */
> @@ -260,10 +268,11 @@ static void *r600_buffer_transfer_map(struct
> pipe_context *ctx, /* At this point, the buffer is always idle (we checked
> it above). */ usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
>       }
> -     /* Using DMA for larger reads is much faster */
> +     /* Using a staging buffer in GTT for larger reads is much faster. */
>       else if ((usage & PIPE_TRANSFER_READ) &&
>                !(usage & PIPE_TRANSFER_WRITE) &&
> -              (rbuffer->domains == RADEON_DOMAIN_VRAM)) {
> +              rbuffer->domains == RADEON_DOMAIN_VRAM &&
> +              r600_can_dma_copy_buffer(rctx, 0, box->x, box->width)) {
>               unsigned offset;
>               struct r600_resource *staging = NULL;
> 
> @@ -274,26 +283,16 @@ static void *r600_buffer_transfer_map(struct
> pipe_context *ctx, if (staging) {
>                       data += box->x % R600_MAP_BUFFER_ALIGNMENT;
> 
> -                     /* Copy the staging buffer into the original one. */
> -                     if (rctx->dma_copy(ctx, (struct pipe_resource*)staging, 
> 0,
> -                                              box->x % 
> R600_MAP_BUFFER_ALIGNMENT,
> -                                              0, 0, resource, level, box)) {
> -                             rctx->rings.gfx.flush(rctx, 0);
> -                             if (rctx->rings.dma.cs)
> -                                     rctx->rings.dma.flush(rctx, 0);
> -
> -                             /* Wait for any offloaded CS flush to complete
> -                              * to avoid busy-waiting in the winsys. */
> -                             rctx->ws->cs_sync_flush(rctx->rings.gfx.cs);
> -                             if (rctx->rings.dma.cs)
> -                                     
> rctx->ws->cs_sync_flush(rctx->rings.dma.cs);
> -
> -                             rctx->ws->buffer_wait(staging->buf, 
> RADEON_USAGE_WRITE);
> -                             return r600_buffer_get_transfer(ctx, resource, 
> level, 
usage, box,
> -                                                             ptransfer, 
> data, staging, offset);
> -                     } else {
> -                             pipe_resource_reference((struct 
> pipe_resource**)&staging, 
NULL);
> -                     }
> +                     /* Copy the VRAM buffer to the staging buffer. */
> +                     rctx->dma_copy(ctx, &staging->b.b, 0,
> +                                    box->x % R600_MAP_BUFFER_ALIGNMENT,
> +                                    0, 0, resource, level, box);
> +
> +                     /* Just do the synchronization. The buffer is mapped 
> already. */
> +                     r600_buffer_map_sync_with_rings(rctx, staging, 
PIPE_TRANSFER_READ);
> +
> +                     return r600_buffer_get_transfer(ctx, resource, level, 
> usage, box,
> +                                                     ptransfer, data, 
> staging, offset);
>               }
>       }
> 
> @@ -329,12 +328,7 @@ static void r600_buffer_transfer_unmap(struct
> pipe_context *ctx, u_box_1d(soffset, size, &box);
> 
>                       /* Copy the staging buffer into the original one. */
> -                     if (!(size % 4) && !(doffset % 4) && !(soffset % 4) &&
> -                         rctx->dma_copy(ctx, dst, 0, doffset, 0, 0, src, 0, 
> &box)) {
> -                             /* DONE. */
> -                     } else {
> -                             ctx->resource_copy_region(ctx, dst, 0, doffset, 
> 0, 0, src, 
0, &box);
> -                     }
> +                     rctx->dma_copy(ctx, dst, 0, doffset, 0, 0, src, 0, 
> &box);
>               }
>               pipe_resource_reference((struct 
> pipe_resource**)&rtransfer->staging,
> NULL); }
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h
> b/src/gallium/drivers/radeon/r600_pipe_common.h index 493a8fc..5e544c5
> 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.h
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.h
> @@ -344,15 +344,14 @@ struct r600_common_context {
>       boolean                         current_render_cond_cond;
>       boolean                         predicate_drawing;
> 
> -     /* Copy one resource to another using async DMA.
> -      * False is returned if the copy couldn't be done. */
> -     boolean (*dma_copy)(struct pipe_context *ctx,
> -                         struct pipe_resource *dst,
> -                         unsigned dst_level,
> -                         unsigned dst_x, unsigned dst_y, unsigned dst_z,
> -                         struct pipe_resource *src,
> -                         unsigned src_level,
> -                         const struct pipe_box *src_box);
> +     /* Copy one resource to another using async DMA. */
> +     void (*dma_copy)(struct pipe_context *ctx,
> +                      struct pipe_resource *dst,
> +                      unsigned dst_level,
> +                      unsigned dst_x, unsigned dst_y, unsigned dst_z,
> +                      struct pipe_resource *src,
> +                      unsigned src_level,
> +                      const struct pipe_box *src_box);
> 
>       void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource 
> *dst,
>                            unsigned offset, unsigned size, unsigned value);
> diff --git a/src/gallium/drivers/radeon/r600_texture.c
> b/src/gallium/drivers/radeon/r600_texture.c index e3b918e..5c32a84 100644
> --- a/src/gallium/drivers/radeon/r600_texture.c
> +++ b/src/gallium/drivers/radeon/r600_texture.c
> @@ -80,12 +80,8 @@ static void r600_copy_to_staging_texture(struct
> pipe_context *ctx, struct r600_t return;
>       }
> 
> -     if (!rctx->dma_copy(ctx, dst, 0, 0, 0, 0,
> -                           src, transfer->level,
> -                           &transfer->box)) {
> -             ctx->resource_copy_region(ctx, dst, 0, 0, 0, 0,
> -                                       src, transfer->level, &transfer->box);
> -     }
> +     rctx->dma_copy(ctx, dst, 0, 0, 0, 0, src, transfer->level,
> +                    &transfer->box);
>  }
> 
>  /* Copy from a transfer's staging texture to a full GPU one. */
> @@ -106,13 +102,9 @@ static void r600_copy_from_staging_texture(struct
> pipe_context *ctx, struct r600 return;
>       }
> 
> -     if (!rctx->dma_copy(ctx, dst, transfer->level,
> -                           transfer->box.x, transfer->box.y, transfer->box.z,
> -                           src, 0, &sbox)) {
> -             ctx->resource_copy_region(ctx, dst, transfer->level,
> -                                       transfer->box.x, transfer->box.y, 
> transfer->box.z,
> -                                       src, 0, &sbox);
> -     }
> +     rctx->dma_copy(ctx, dst, transfer->level,
> +                    transfer->box.x, transfer->box.y, transfer->box.z,
> +                    src, 0, &sbox);
>  }
> 
>  static unsigned r600_texture_get_offset(struct r600_texture *rtex, unsigned
> level, diff --git a/src/gallium/drivers/radeonsi/si_state.c
> b/src/gallium/drivers/radeonsi/si_state.c index 4094421..9210547 100644
> --- a/src/gallium/drivers/radeonsi/si_state.c
> +++ b/src/gallium/drivers/radeonsi/si_state.c
> @@ -2919,16 +2919,19 @@ static void *si_create_blend_custom(struct
> si_context *sctx, unsigned mode) return
> si_create_blend_state_mode(&sctx->b.b, &blend, mode);
>  }
> 
> -static boolean si_dma_copy(struct pipe_context *ctx,
> -                        struct pipe_resource *dst,
> -                        unsigned dst_level,
> -                        unsigned dst_x, unsigned dst_y, unsigned dst_z,
> -                        struct pipe_resource *src,
> -                        unsigned src_level,
> -                        const struct pipe_box *src_box)
> +static void si_dma_copy(struct pipe_context *ctx,
> +                     struct pipe_resource *dst,
> +                     unsigned dst_level,
> +                     unsigned dst_x, unsigned dst_y, unsigned dst_z,
> +                     struct pipe_resource *src,
> +                     unsigned src_level,
> +                     const struct pipe_box *src_box)
>  {
>       /* XXX implement this or share evergreen_dma_blit with r600g */
> -     return FALSE;
> +
> +     /* Fallback: */
> +     ctx->resource_copy_region(ctx, dst, dst_level, dst_x, dst_y, dst_z,
> +                               src, src_level, src_box);
>  }
> 
>  static void si_set_occlusion_query_state(struct pipe_context *ctx, bool
> enable) --
> 1.8.3.2
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to