On Tue, May 26, 2015 at 3:29 PM, Grigori Goronzy <g...@chown.ath.cx> wrote: > On 26.05.2015 09:28, Michel Dänzer wrote: >> From: Michel Dänzer <michel.daen...@amd.com> >> >> Based on the corresponding SI support. Same as that, this is currently >> only enabled for one-dimensional buffer copies due to issues with >> multi-dimensional SDMA copies. >> > > What a pity, so CIK has exactly the same issues as SI? We should really > try to figure out what's wrong with tiled DMA copies.
It's probably worth checking again with amdgpu and addrlib. Might be some alignment we are getting wrong somewhere. Alex > > Anyway, > > Reviewed-by: Grigori Goronzy <g...@chown.ath.cx> > >> Signed-off-by: Michel Dänzer <michel.daen...@amd.com> >> --- >> src/gallium/drivers/radeonsi/Makefile.sources | 1 + >> src/gallium/drivers/radeonsi/cik_sdma.c | 364 >> ++++++++++++++++++++++++++ >> src/gallium/drivers/radeonsi/si_dma.c | 20 -- >> src/gallium/drivers/radeonsi/si_pipe.h | 9 + >> src/gallium/drivers/radeonsi/si_state.c | 22 +- >> src/gallium/drivers/radeonsi/si_state.h | 1 + >> src/gallium/drivers/radeonsi/sid.h | 31 +++ >> 7 files changed, 427 insertions(+), 21 deletions(-) >> create mode 100644 src/gallium/drivers/radeonsi/cik_sdma.c >> >> diff --git a/src/gallium/drivers/radeonsi/Makefile.sources >> b/src/gallium/drivers/radeonsi/Makefile.sources >> index 774dc22..2876c0a 100644 >> --- a/src/gallium/drivers/radeonsi/Makefile.sources >> +++ b/src/gallium/drivers/radeonsi/Makefile.sources >> @@ -1,4 +1,5 @@ >> C_SOURCES := \ >> + cik_sdma.c \ >> si_blit.c \ >> si_commands.c \ >> si_compute.c \ >> diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c >> b/src/gallium/drivers/radeonsi/cik_sdma.c >> new file mode 100644 >> index 0000000..3c0103a >> --- /dev/null >> +++ b/src/gallium/drivers/radeonsi/cik_sdma.c >> @@ -0,0 +1,364 @@ >> +/* >> + * Copyright 2010 Jerome Glisse <gli...@freedesktop.org> >> + * Copyright 2014 Advanced Micro Devices, Inc. >> + * >> + * Permission is hereby granted, free of charge, to any person obtaining a >> + * copy of this software and associated documentation files (the >> "Software"), >> + * to deal in the Software without restriction, including without limitation >> + * on the rights to use, copy, modify, merge, publish, distribute, sub >> + * license, and/or sell copies of the Software, and to permit persons to >> whom >> + * the Software is furnished to do so, subject to the following conditions: >> + * >> + * The above copyright notice and this permission notice (including the next >> + * paragraph) shall be included in all copies or substantial portions of the >> + * Software. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS >> OR >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >> + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL >> + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, >> + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR >> + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE >> + * USE OR OTHER DEALINGS IN THE SOFTWARE. >> + * >> + * Authors: >> + * Jerome Glisse >> + */ >> + >> +#include "sid.h" >> +#include "si_pipe.h" >> +#include "../radeon/r600_cs.h" >> + >> +#include "util/u_format.h" >> + >> +static uint32_t cik_micro_tile_mode(struct si_screen *sscreen, unsigned >> tile_mode) >> +{ >> + if (sscreen->b.info.si_tile_mode_array_valid) { >> + uint32_t gb_tile_mode = >> sscreen->b.info.si_tile_mode_array[tile_mode]; >> + >> + return G_009910_MICRO_TILE_MODE_NEW(gb_tile_mode); >> + } >> + >> + /* The kernel cannod return the tile mode array. Guess? */ >> + return V_009910_ADDR_SURF_THIN_MICRO_TILING; >> +} >> + >> +static void cik_sdma_do_copy_buffer(struct si_context *ctx, >> + struct pipe_resource *dst, >> + struct pipe_resource *src, >> + uint64_t dst_offset, >> + uint64_t src_offset, >> + uint64_t size) >> +{ >> + struct radeon_winsys_cs *cs = ctx->b.rings.dma.cs; >> + unsigned i, ncopy, csize; >> + struct r600_resource *rdst = (struct r600_resource*)dst; >> + struct r600_resource *rsrc = (struct r600_resource*)src; >> + >> + dst_offset += r600_resource(dst)->gpu_address; >> + src_offset += r600_resource(src)->gpu_address; >> + >> + ncopy = (size + CIK_SDMA_COPY_MAX_SIZE - 1) / CIK_SDMA_COPY_MAX_SIZE; >> + r600_need_dma_space(&ctx->b, ncopy * 7); >> + >> + r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, rsrc, >> RADEON_USAGE_READ, >> + RADEON_PRIO_MIN); >> + r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, rdst, >> RADEON_USAGE_WRITE, >> + RADEON_PRIO_MIN); >> + >> + for (i = 0; i < ncopy; i++) { >> + csize = size < CIK_SDMA_COPY_MAX_SIZE ? size : >> CIK_SDMA_COPY_MAX_SIZE; >> + cs->buf[cs->cdw++] = CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, >> + >> CIK_SDMA_COPY_SUB_OPCODE_LINEAR, >> + 0); >> + cs->buf[cs->cdw++] = csize; >> + cs->buf[cs->cdw++] = 0; /* src/dst endian swap */ >> + cs->buf[cs->cdw++] = src_offset; >> + cs->buf[cs->cdw++] = src_offset >> 32; >> + cs->buf[cs->cdw++] = dst_offset; >> + cs->buf[cs->cdw++] = dst_offset >> 32; >> + dst_offset += csize; >> + src_offset += csize; >> + size -= csize; >> + } >> +} >> + >> +static void cik_sdma_copy_buffer(struct si_context *ctx, >> + struct pipe_resource *dst, >> + struct pipe_resource *src, >> + uint64_t dst_offset, >> + uint64_t src_offset, >> + uint64_t size) >> +{ >> + struct r600_resource *rdst = (struct r600_resource*)dst; >> + >> + /* Mark the buffer range of destination as valid (initialized), >> + * so that transfer_map knows it should wait for the GPU when mapping >> + * that range. */ >> + util_range_add(&rdst->valid_buffer_range, dst_offset, >> + dst_offset + size); >> + >> + cik_sdma_do_copy_buffer(ctx, dst, src, dst_offset, src_offset, size); >> +} >> + >> +static void cik_sdma_copy_tile(struct si_context *ctx, >> + struct pipe_resource *dst, >> + unsigned dst_level, >> + struct pipe_resource *src, >> + unsigned src_level, >> + unsigned y, >> + unsigned copy_height, >> + unsigned y_align, >> + unsigned pitch, >> + unsigned bpe) >> +{ >> + struct radeon_winsys_cs *cs = ctx->b.rings.dma.cs; >> + struct si_screen *sscreen = ctx->screen; >> + struct r600_texture *rsrc = (struct r600_texture*)src; >> + struct r600_texture *rdst = (struct r600_texture*)dst; >> + struct r600_texture *rlinear, *rtiled; >> + unsigned linear_lvl, tiled_lvl; >> + unsigned array_mode, lbpe, pitch_tile_max, slice_tile_max, size; >> + unsigned ncopy, height, cheight, detile, i, src_mode, dst_mode; >> + unsigned sub_op, bank_h, bank_w, mt_aspect, nbanks, tile_split, mt; >> + uint64_t base, addr; >> + unsigned pipe_config, tile_mode_index; >> + >> + dst_mode = rdst->surface.level[dst_level].mode; >> + src_mode = rsrc->surface.level[src_level].mode; >> + /* downcast linear aligned to linear to simplify test */ >> + src_mode = src_mode == RADEON_SURF_MODE_LINEAR_ALIGNED ? >> RADEON_SURF_MODE_LINEAR : src_mode; >> + dst_mode = dst_mode == RADEON_SURF_MODE_LINEAR_ALIGNED ? >> RADEON_SURF_MODE_LINEAR : dst_mode; >> + assert(dst_mode != src_mode); >> + assert(src_mode == RADEON_SURF_MODE_LINEAR || dst_mode == >> RADEON_SURF_MODE_LINEAR); >> + >> + sub_op = CIK_SDMA_COPY_SUB_OPCODE_TILED; >> + lbpe = util_logbase2(bpe); >> + pitch_tile_max = ((pitch / bpe) / 8) - 1; >> + >> + detile = dst_mode == RADEON_SURF_MODE_LINEAR; >> + rlinear = detile ? rdst : rsrc; >> + rtiled = detile ? rsrc : rdst; >> + linear_lvl = detile ? dst_level : src_level; >> + tiled_lvl = detile ? src_level : dst_level; >> + >> + assert(!util_format_is_depth_and_stencil(rtiled->resource.b.b.format)); >> + >> + array_mode = si_array_mode(rtiled->surface.level[tiled_lvl].mode); >> + slice_tile_max = (rtiled->surface.level[tiled_lvl].nblk_x * >> + rtiled->surface.level[tiled_lvl].nblk_y) / (8*8) - 1; >> + height = rlinear->surface.level[linear_lvl].nblk_y; >> + base = rtiled->surface.level[tiled_lvl].offset; >> + addr = rlinear->surface.level[linear_lvl].offset; >> + bank_h = cik_bank_wh(rtiled->surface.bankh); >> + bank_w = cik_bank_wh(rtiled->surface.bankw); >> + mt_aspect = cik_macro_tile_aspect(rtiled->surface.mtilea); >> + tile_split = cik_tile_split(rtiled->surface.tile_split); >> + tile_mode_index = si_tile_mode_index(rtiled, tiled_lvl, false); >> + nbanks = si_num_banks(sscreen, rtiled); >> + base += rtiled->resource.gpu_address; >> + addr += rlinear->resource.gpu_address; >> + >> + pipe_config = cik_db_pipe_config(sscreen, tile_mode_index); >> + mt = cik_micro_tile_mode(sscreen, tile_mode_index); >> + >> + size = (copy_height * pitch) / 4; >> + cheight = copy_height; >> + if (((cheight * pitch) / 4) > CIK_SDMA_COPY_MAX_SIZE) { >> + cheight = (CIK_SDMA_COPY_MAX_SIZE * 4) / pitch; >> + cheight &= ~(y_align - 1); >> + } >> + ncopy = (copy_height + cheight - 1) / cheight; >> + r600_need_dma_space(&ctx->b, ncopy * 12); >> + >> + r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, &rsrc->resource, >> + RADEON_USAGE_READ, RADEON_PRIO_MIN); >> + r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, &rdst->resource, >> + RADEON_USAGE_WRITE, RADEON_PRIO_MIN); >> + >> + copy_height = size * 4 / pitch; >> + for (i = 0; i < ncopy; i++) { >> + cheight = copy_height; >> + if (((cheight * pitch) / 4) > CIK_SDMA_COPY_MAX_SIZE) { >> + cheight = (CIK_SDMA_COPY_MAX_SIZE * 4) / pitch; >> + cheight &= ~(y_align - 1); >> + } >> + size = (cheight * pitch) / 4; >> + >> + cs->buf[cs->cdw++] = CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, >> + sub_op, detile << 15); >> + cs->buf[cs->cdw++] = base; >> + cs->buf[cs->cdw++] = base >> 32; >> + cs->buf[cs->cdw++] = ((height - 1) << 16) | pitch_tile_max; >> + cs->buf[cs->cdw++] = slice_tile_max; >> + cs->buf[cs->cdw++] = (pipe_config << 26) | (mt_aspect << 24) | >> + (nbanks << 21) | (bank_h << 18) | (bank_w << 15) | >> + (tile_split << 11) | (mt << 8) | (array_mode << 3) | >> + lbpe; >> + cs->buf[cs->cdw++] = y << 16; /* | x */ >> + cs->buf[cs->cdw++] = 0; /* z */; >> + cs->buf[cs->cdw++] = addr & 0xfffffffc; >> + cs->buf[cs->cdw++] = addr >> 32; >> + cs->buf[cs->cdw++] = (pitch / bpe) - 1; >> + cs->buf[cs->cdw++] = size; >> + >> + copy_height -= cheight; >> + y += cheight; >> + } >> +} >> + >> +void cik_sdma_copy(struct pipe_context *ctx, >> + struct pipe_resource *dst, >> + unsigned dst_level, >> + unsigned dstx, unsigned dsty, unsigned dstz, >> + struct pipe_resource *src, >> + unsigned src_level, >> + const struct pipe_box *src_box) >> +{ >> + struct si_context *sctx = (struct si_context *)ctx; >> + struct r600_texture *rsrc = (struct r600_texture*)src; >> + struct r600_texture *rdst = (struct r600_texture*)dst; >> + unsigned dst_pitch, src_pitch, bpe, dst_mode, src_mode; >> + unsigned src_w, dst_w; >> + unsigned src_x, src_y; >> + unsigned copy_height, y_align; >> + unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz; >> + >> + if (sctx->b.rings.dma.cs == NULL) { >> + goto fallback; >> + } >> + >> + if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { >> + cik_sdma_copy_buffer(sctx, dst, src, dst_x, src_box->x, >> src_box->width); >> + return; >> + } >> + >> + /* Before re-enabling this, please make sure you can hit all newly >> + * enabled paths in your testing, preferably with both piglit (in >> + * particular the streaming-texture-leak test) and real world apps >> + * (e.g. the UE4 Elemental demo). >> + */ >> + goto fallback; >> + >> + if (src->format != dst->format || >> + rdst->surface.nsamples > 1 || rsrc->surface.nsamples > 1 || >> + rdst->dirty_level_mask & (1 << dst_level)) { >> + goto fallback; >> + } >> + >> + if (rsrc->dirty_level_mask & (1 << src_level)) { >> + if (rsrc->htile_buffer) >> + goto fallback; >> + >> + ctx->flush_resource(ctx, src); >> + } >> + >> + src_x = util_format_get_nblocksx(src->format, src_box->x); >> + dst_x = util_format_get_nblocksx(src->format, dst_x); >> + src_y = util_format_get_nblocksy(src->format, src_box->y); >> + dst_y = util_format_get_nblocksy(src->format, dst_y); >> + >> + dst_pitch = rdst->surface.level[dst_level].pitch_bytes; >> + src_pitch = rsrc->surface.level[src_level].pitch_bytes; >> + src_w = rsrc->surface.level[src_level].npix_x; >> + dst_w = rdst->surface.level[dst_level].npix_x; >> + >> + if (src_pitch != dst_pitch || src_box->x || dst_x || src_w != dst_w || >> + src_box->width != src_w || >> + rsrc->surface.level[src_level].nblk_y != >> + rdst->surface.level[dst_level].nblk_y) { >> + /* FIXME CIK can do partial blit */ >> + goto fallback; >> + } >> + >> + bpe = rdst->surface.bpe; >> + copy_height = src_box->height / rsrc->surface.blk_h; >> + dst_mode = rdst->surface.level[dst_level].mode; >> + src_mode = rsrc->surface.level[src_level].mode; >> + /* downcast linear aligned to linear to simplify test */ >> + src_mode = src_mode == RADEON_SURF_MODE_LINEAR_ALIGNED ? >> RADEON_SURF_MODE_LINEAR : src_mode; >> + dst_mode = dst_mode == RADEON_SURF_MODE_LINEAR_ALIGNED ? >> RADEON_SURF_MODE_LINEAR : dst_mode; >> + >> + /* Dimensions must be aligned to (macro)tiles */ >> + switch (src_mode == RADEON_SURF_MODE_LINEAR ? dst_mode : src_mode) { >> + case RADEON_SURF_MODE_1D: >> + if ((src_x % 8) || (src_y % 8) || (dst_x % 8) || (dst_y % 8) || >> + (copy_height % 8)) >> + goto fallback; >> + y_align = 8; >> + break; >> + case RADEON_SURF_MODE_2D: { >> + unsigned mtilew, mtileh, num_banks; >> + >> + switch (si_num_banks(sctx->screen, rsrc)) { >> + case V_02803C_ADDR_SURF_2_BANK: >> + default: >> + num_banks = 2; >> + break; >> + case V_02803C_ADDR_SURF_4_BANK: >> + num_banks = 4; >> + break; >> + case V_02803C_ADDR_SURF_8_BANK: >> + num_banks = 8; >> + break; >> + case V_02803C_ADDR_SURF_16_BANK: >> + num_banks = 16; >> + break; >> + } >> + >> + mtilew = (8 * rsrc->surface.bankw * >> + sctx->screen->b.tiling_info.num_channels) * >> + rsrc->surface.mtilea; >> + assert(!(mtilew & (mtilew - 1))); >> + mtileh = (8 * rsrc->surface.bankh * num_banks) / >> + rsrc->surface.mtilea; >> + assert(!(mtileh & (mtileh - 1))); >> + >> + if ((src_x & (mtilew - 1)) || (src_y & (mtileh - 1)) || >> + (dst_x & (mtilew - 1)) || (dst_y & (mtileh - 1)) || >> + (copy_height & (mtileh - 1))) >> + goto fallback; >> + >> + y_align = mtileh; >> + break; >> + } >> + default: >> + y_align = 1; >> + } >> + >> + if (src_mode == dst_mode) { >> + uint64_t dst_offset, src_offset; >> + unsigned src_h, dst_h; >> + >> + src_h = rsrc->surface.level[src_level].npix_y; >> + dst_h = rdst->surface.level[dst_level].npix_y; >> + >> + if (src_box->depth > 1 && >> + (src_y || dst_y || src_h != dst_h || src_box->height != >> src_h)) >> + goto fallback; >> + >> + /* simple dma blit would do NOTE code here assume : >> + * dst_pitch == src_pitch >> + */ >> + src_offset= rsrc->surface.level[src_level].offset; >> + src_offset += rsrc->surface.level[src_level].slice_size * >> src_box->z; >> + src_offset += src_y * src_pitch + src_x * bpe; >> + dst_offset = rdst->surface.level[dst_level].offset; >> + dst_offset += rdst->surface.level[dst_level].slice_size * >> dst_z; >> + dst_offset += dst_y * dst_pitch + dst_x * bpe; >> + cik_sdma_do_copy_buffer(sctx, dst, src, dst_offset, src_offset, >> + src_box->depth * >> + >> rsrc->surface.level[src_level].slice_size); >> + } else { >> + if (dst_y != src_y || src_box->depth > 1 || src_box->z || >> dst_z) >> + goto fallback; >> + >> + cik_sdma_copy_tile(sctx, dst, dst_level, src, src_level, >> + src_y, copy_height, y_align, dst_pitch, >> bpe); >> + } >> + return; >> + >> +fallback: >> + si_resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, >> + src, src_level, src_box); >> +} >> diff --git a/src/gallium/drivers/radeonsi/si_dma.c >> b/src/gallium/drivers/radeonsi/si_dma.c >> index db523ee..7a0076e 100644 >> --- a/src/gallium/drivers/radeonsi/si_dma.c >> +++ b/src/gallium/drivers/radeonsi/si_dma.c >> @@ -30,21 +30,6 @@ >> >> #include "util/u_format.h" >> >> -static unsigned si_array_mode(unsigned mode) >> -{ >> - switch (mode) { >> - case RADEON_SURF_MODE_LINEAR_ALIGNED: >> - return V_009910_ARRAY_LINEAR_ALIGNED; >> - case RADEON_SURF_MODE_1D: >> - return V_009910_ARRAY_1D_TILED_THIN1; >> - case RADEON_SURF_MODE_2D: >> - return V_009910_ARRAY_2D_TILED_THIN1; >> - default: >> - case RADEON_SURF_MODE_LINEAR: >> - return V_009910_ARRAY_LINEAR_GENERAL; >> - } >> -} >> - >> static uint32_t si_micro_tile_mode(struct si_screen *sscreen, unsigned >> tile_mode) >> { >> if (sscreen->b.info.si_tile_mode_array_valid) { >> @@ -240,11 +225,6 @@ void si_dma_copy(struct pipe_context *ctx, >> goto fallback; >> } >> >> - /* TODO: Implement DMA copy for CIK */ >> - if (sctx->b.chip_class >= CIK) { >> - goto fallback; >> - } >> - >> if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { >> si_dma_copy_buffer(sctx, dst, src, dst_x, src_box->x, >> src_box->width); >> return; >> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h >> b/src/gallium/drivers/radeonsi/si_pipe.h >> index f98c7a8..2d67342 100644 >> --- a/src/gallium/drivers/radeonsi/si_pipe.h >> +++ b/src/gallium/drivers/radeonsi/si_pipe.h >> @@ -237,6 +237,15 @@ struct si_context { >> unsigned spi_tmpring_size; >> }; >> >> +/* cik_sdma.c */ >> +void cik_sdma_copy(struct pipe_context *ctx, >> + struct pipe_resource *dst, >> + unsigned dst_level, >> + unsigned dstx, unsigned dsty, unsigned dstz, >> + struct pipe_resource *src, >> + unsigned src_level, >> + const struct pipe_box *src_box); >> + >> /* si_blit.c */ >> void si_init_blit_functions(struct si_context *sctx); >> void si_flush_depth_textures(struct si_context *sctx, >> diff --git a/src/gallium/drivers/radeonsi/si_state.c >> b/src/gallium/drivers/radeonsi/si_state.c >> index 7f0fdd5..f003a04 100644 >> --- a/src/gallium/drivers/radeonsi/si_state.c >> +++ b/src/gallium/drivers/radeonsi/si_state.c >> @@ -44,6 +44,21 @@ static void si_init_atom(struct r600_atom *atom, struct >> r600_atom **list_elem, >> *list_elem = atom; >> } >> >> +unsigned si_array_mode(unsigned mode) >> +{ >> + switch (mode) { >> + case RADEON_SURF_MODE_LINEAR_ALIGNED: >> + return V_009910_ARRAY_LINEAR_ALIGNED; >> + case RADEON_SURF_MODE_1D: >> + return V_009910_ARRAY_1D_TILED_THIN1; >> + case RADEON_SURF_MODE_2D: >> + return V_009910_ARRAY_2D_TILED_THIN1; >> + default: >> + case RADEON_SURF_MODE_LINEAR: >> + return V_009910_ARRAY_LINEAR_GENERAL; >> + } >> +} >> + >> uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex) >> { >> if (sscreen->b.chip_class == CIK && >> @@ -2910,11 +2925,16 @@ void si_init_state_functions(struct si_context *sctx) >> sctx->b.b.set_polygon_stipple = si_set_polygon_stipple; >> sctx->b.b.set_min_samples = si_set_min_samples; >> >> - sctx->b.dma_copy = si_dma_copy; >> sctx->b.set_occlusion_query_state = si_set_occlusion_query_state; >> sctx->b.need_gfx_cs_space = si_need_gfx_cs_space; >> >> sctx->b.b.draw_vbo = si_draw_vbo; >> + >> + if (sctx->b.chip_class >= CIK) { >> + sctx->b.dma_copy = cik_sdma_copy; >> + } else { >> + sctx->b.dma_copy = si_dma_copy; >> + } >> } >> >> static void >> diff --git a/src/gallium/drivers/radeonsi/si_state.h >> b/src/gallium/drivers/radeonsi/si_state.h >> index 2f8a943..5e68b16 100644 >> --- a/src/gallium/drivers/radeonsi/si_state.h >> +++ b/src/gallium/drivers/radeonsi/si_state.h >> @@ -261,6 +261,7 @@ unsigned cik_bank_wh(unsigned bankwh); >> unsigned cik_db_pipe_config(struct si_screen *sscreen, unsigned tile_mode); >> unsigned cik_macro_tile_aspect(unsigned macro_tile_aspect); >> unsigned cik_tile_split(unsigned tile_split); >> +unsigned si_array_mode(unsigned mode); >> uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex); >> unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool >> stencil); >> >> diff --git a/src/gallium/drivers/radeonsi/sid.h >> b/src/gallium/drivers/radeonsi/sid.h >> index afe011b..35d5ee2 100644 >> --- a/src/gallium/drivers/radeonsi/sid.h >> +++ b/src/gallium/drivers/radeonsi/sid.h >> @@ -4516,6 +4516,13 @@ >> #define V_009910_ADDR_SURF_8_BANK 0x02 >> #define V_009910_ADDR_SURF_16_BANK 0x03 >> /* CIK */ >> +#define S_009910_MICRO_TILE_MODE_NEW(x) (((x) >> & 0x07) << 22) >> +#define G_009910_MICRO_TILE_MODE_NEW(x) (((x) >> >> 22) & 0x07) >> +#define C_009910_MICRO_TILE_MODE_NEW(x) >> 0xFE3FFFFF >> +#define V_009910_ADDR_SURF_DISPLAY_MICRO_TILING 0x00 >> +#define V_009910_ADDR_SURF_THIN_MICRO_TILING 0x01 >> +#define V_009910_ADDR_SURF_DEPTH_MICRO_TILING 0x02 >> +#define V_009910_ADDR_SURF_ROTATED_MICRO_TILING 0x03 >> #define R_00B01C_SPI_SHADER_PGM_RSRC3_PS >> 0x00B01C >> #define S_00B01C_CU_EN(x) (((x) >> & 0xFFFF) << 0) >> #define G_00B01C_CU_EN(x) (((x) >> >> 0) & 0xFFFF) >> @@ -8696,5 +8703,29 @@ >> #define SI_DMA_PACKET_CONSTANT_FILL 0xd >> #define SI_DMA_PACKET_NOP 0xf >> >> +/* CIK async DMA packets */ >> +#define CIK_SDMA_PACKET(op, sub_op, n) ((((n) & 0xFFFF) << 16) | \ >> + (((sub_op) & 0xFF) << 8) | \ >> + (((op) & 0xFF) << 0)) >> +/* CIK async DMA packet types */ >> +#define CIK_SDMA_OPCODE_NOP 0x0 >> +#define CIK_SDMA_OPCODE_COPY 0x1 >> +#define CIK_SDMA_COPY_SUB_OPCODE_LINEAR 0x0 >> +#define CIK_SDMA_COPY_SUB_OPCODE_TILED 0x1 >> +#define CIK_SDMA_COPY_SUB_OPCODE_SOA 0x3 >> +#define CIK_SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW 0x4 >> +#define CIK_SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW 0x5 >> +#define CIK_SDMA_COPY_SUB_OPCODE_T2T_SUB_WINDOW 0x6 >> +#define CIK_SDMA_OPCODE_WRITE 0x2 >> +#define SDMA_WRITE_SUB_OPCODE_LINEAR 0x0 >> +#define SDMA_WRTIE_SUB_OPCODE_TILED 0x1 >> +#define CIK_SDMA_OPCODE_INDIRECT_BUFFER 0x4 >> +#define CIK_SDMA_PACKET_FENCE 0x5 >> +#define CIK_SDMA_PACKET_TRAP 0x6 >> +#define CIK_SDMA_PACKET_SEMAPHORE 0x7 >> +#define CIK_SDMA_PACKET_CONSTANT_FILL 0xb >> +#define CIK_SDMA_PACKET_SRBM_WRITE 0xe >> +#define CIK_SDMA_COPY_MAX_SIZE 0x1fffff >> + >> #endif /* _SID_H */ >> >> > > > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev