Module: Mesa Branch: main Commit: 564b97219641941ab414b863e78a13e9cfc1bbe6 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=564b97219641941ab414b863e78a13e9cfc1bbe6
Author: Vitaliy Triang3l Kuzmin <trian...@yandex.ru> Date: Sat Oct 7 19:10:46 2023 +0300 r600: Move r600_create_vertex_fetch_shader to r600_shader.c For r600_asm to be usable outside Gallium Signed-off-by: Vitaliy Triang3l Kuzmin <trian...@yandex.ru> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25695> --- src/gallium/drivers/r600/r600_asm.c | 160 --------------------------------- src/gallium/drivers/r600/r600_asm.h | 4 - src/gallium/drivers/r600/r600_shader.c | 160 +++++++++++++++++++++++++++++++++ src/gallium/drivers/r600/r600_shader.h | 4 + 4 files changed, 164 insertions(+), 164 deletions(-) diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 85fcb31d194..363b297f53e 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -28,8 +28,6 @@ #include <errno.h> #include "util/u_bitcast.h" -#include "util/u_dump.h" -#include "util/u_endian.h" #include "util/u_memory.h" #include "util/u_math.h" #include "pipe/p_shader_tokens.h" @@ -2807,164 +2805,6 @@ out_unknown: R600_ERR("unsupported vertex format %s\n", util_format_name(pformat)); } -void *r600_create_vertex_fetch_shader(struct pipe_context *ctx, - unsigned count, - const struct pipe_vertex_element *elements) -{ - struct r600_context *rctx = (struct r600_context *)ctx; - struct r600_bytecode bc; - struct r600_bytecode_vtx vtx; - const struct util_format_description *desc; - unsigned fetch_resource_start = rctx->b.gfx_level >= EVERGREEN ? 0 : 160; - unsigned format, num_format, format_comp, endian; - uint32_t *bytecode; - int i, j, r, fs_size; - uint32_t buffer_mask = 0; - struct r600_fetch_shader *shader; - unsigned strides[PIPE_MAX_ATTRIBS]; - - assert(count < 32); - - memset(&bc, 0, sizeof(bc)); - r600_bytecode_init(&bc, rctx->b.gfx_level, rctx->b.family, - rctx->screen->has_compressed_msaa_texturing); - - bc.isa = rctx->isa; - - for (i = 0; i < count; i++) { - if (elements[i].instance_divisor > 1) { - if (rctx->b.gfx_level == CAYMAN) { - for (j = 0; j < 4; j++) { - struct r600_bytecode_alu alu; - memset(&alu, 0, sizeof(alu)); - alu.op = ALU_OP2_MULHI_UINT; - alu.src[0].sel = 0; - alu.src[0].chan = 3; - alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; - alu.src[1].value = (1ll << 32) / elements[i].instance_divisor + 1; - alu.dst.sel = i + 1; - alu.dst.chan = j; - alu.dst.write = j == 3; - alu.last = j == 3; - if ((r = r600_bytecode_add_alu(&bc, &alu))) { - r600_bytecode_clear(&bc); - return NULL; - } - } - } else { - struct r600_bytecode_alu alu; - memset(&alu, 0, sizeof(alu)); - alu.op = ALU_OP2_MULHI_UINT; - alu.src[0].sel = 0; - alu.src[0].chan = 3; - alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; - alu.src[1].value = (1ll << 32) / elements[i].instance_divisor + 1; - alu.dst.sel = i + 1; - alu.dst.chan = 3; - alu.dst.write = 1; - alu.last = 1; - if ((r = r600_bytecode_add_alu(&bc, &alu))) { - r600_bytecode_clear(&bc); - return NULL; - } - } - } - strides[elements[i].vertex_buffer_index] = elements[i].src_stride; - buffer_mask |= BITFIELD_BIT(elements[i].vertex_buffer_index); - } - - for (i = 0; i < count; i++) { - r600_vertex_data_type(elements[i].src_format, - &format, &num_format, &format_comp, &endian); - - desc = util_format_description(elements[i].src_format); - - if (elements[i].src_offset > 65535) { - r600_bytecode_clear(&bc); - R600_ERR("too big src_offset: %u\n", elements[i].src_offset); - return NULL; - } - - memset(&vtx, 0, sizeof(vtx)); - vtx.buffer_id = elements[i].vertex_buffer_index + fetch_resource_start; - vtx.fetch_type = elements[i].instance_divisor ? SQ_VTX_FETCH_INSTANCE_DATA : SQ_VTX_FETCH_VERTEX_DATA; - vtx.src_gpr = elements[i].instance_divisor > 1 ? i + 1 : 0; - vtx.src_sel_x = elements[i].instance_divisor ? 3 : 0; - vtx.mega_fetch_count = 0x1F; - vtx.dst_gpr = i + 1; - vtx.dst_sel_x = desc->swizzle[0]; - vtx.dst_sel_y = desc->swizzle[1]; - vtx.dst_sel_z = desc->swizzle[2]; - vtx.dst_sel_w = desc->swizzle[3]; - vtx.data_format = format; - vtx.num_format_all = num_format; - vtx.format_comp_all = format_comp; - vtx.offset = elements[i].src_offset; - vtx.endian = endian; - - if ((r = r600_bytecode_add_vtx(&bc, &vtx))) { - r600_bytecode_clear(&bc); - return NULL; - } - } - - r600_bytecode_add_cfinst(&bc, CF_OP_RET); - - if ((r = r600_bytecode_build(&bc))) { - r600_bytecode_clear(&bc); - return NULL; - } - - if (rctx->screen->b.debug_flags & DBG_FS) { - fprintf(stderr, "--------------------------------------------------------------\n"); - fprintf(stderr, "Vertex elements state:\n"); - for (i = 0; i < count; i++) { - fprintf(stderr, " "); - util_dump_vertex_element(stderr, elements+i); - fprintf(stderr, "\n"); - } - - r600_bytecode_disasm(&bc); - } - - fs_size = bc.ndw*4; - - /* Allocate the CSO. */ - shader = CALLOC_STRUCT(r600_fetch_shader); - if (!shader) { - r600_bytecode_clear(&bc); - return NULL; - } - memcpy(shader->strides, strides, sizeof(strides)); - shader->buffer_mask = buffer_mask; - - u_suballocator_alloc(&rctx->allocator_fetch_shader, fs_size, 256, - &shader->offset, - (struct pipe_resource**)&shader->buffer); - if (!shader->buffer) { - r600_bytecode_clear(&bc); - FREE(shader); - return NULL; - } - - bytecode = r600_buffer_map_sync_with_rings - (&rctx->b, shader->buffer, - PIPE_MAP_WRITE | PIPE_MAP_UNSYNCHRONIZED | RADEON_MAP_TEMPORARY); - bytecode += shader->offset / 4; - - if (UTIL_ARCH_BIG_ENDIAN) { - for (i = 0; i < fs_size / 4; ++i) { - bytecode[i] = util_cpu_to_le32(bc.bytecode[i]); - } - } else { - memcpy(bytecode, bc.bytecode, fs_size); - } - rctx->b.ws->buffer_unmap(rctx->b.ws, shader->buffer->buf); - - r600_bytecode_clear(&bc); - return shader; -} - void r600_bytecode_alu_read(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1) { diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index fd38782da25..e0bcb8f91d4 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -332,10 +332,6 @@ int r600_load_ar(struct r600_bytecode *bc, bool for_src); int cm_bytecode_add_cf_end(struct r600_bytecode *bc); -void *r600_create_vertex_fetch_shader(struct pipe_context *ctx, - unsigned count, - const struct pipe_vertex_element *elements); - /* r700_asm.c */ void r700_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_cf *cf); diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 44d6506b573..31b69873a0d 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -40,7 +40,9 @@ #include "nir/tgsi_to_nir.h" #include "nir/nir_to_tgsi_info.h" #include "compiler/nir/nir.h" +#include "util/macros.h" #include "util/u_bitcast.h" +#include "util/u_dump.h" #include "util/u_endian.h" #include "util/u_memory.h" #include "util/u_math.h" @@ -369,6 +371,164 @@ struct r600_shader_ctx { unsigned enabled_stream_buffers_mask; }; +void *r600_create_vertex_fetch_shader(struct pipe_context *ctx, + unsigned count, + const struct pipe_vertex_element *elements) +{ + struct r600_context *rctx = (struct r600_context *)ctx; + struct r600_bytecode bc; + struct r600_bytecode_vtx vtx; + const struct util_format_description *desc; + unsigned fetch_resource_start = rctx->b.gfx_level >= EVERGREEN ? 0 : 160; + unsigned format, num_format, format_comp, endian; + uint32_t *bytecode; + int i, j, r, fs_size; + uint32_t buffer_mask = 0; + struct r600_fetch_shader *shader; + unsigned strides[PIPE_MAX_ATTRIBS]; + + assert(count < 32); + + memset(&bc, 0, sizeof(bc)); + r600_bytecode_init(&bc, rctx->b.gfx_level, rctx->b.family, + rctx->screen->has_compressed_msaa_texturing); + + bc.isa = rctx->isa; + + for (i = 0; i < count; i++) { + if (elements[i].instance_divisor > 1) { + if (rctx->b.gfx_level == CAYMAN) { + for (j = 0; j < 4; j++) { + struct r600_bytecode_alu alu; + memset(&alu, 0, sizeof(alu)); + alu.op = ALU_OP2_MULHI_UINT; + alu.src[0].sel = 0; + alu.src[0].chan = 3; + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = (1ll << 32) / elements[i].instance_divisor + 1; + alu.dst.sel = i + 1; + alu.dst.chan = j; + alu.dst.write = j == 3; + alu.last = j == 3; + if ((r = r600_bytecode_add_alu(&bc, &alu))) { + r600_bytecode_clear(&bc); + return NULL; + } + } + } else { + struct r600_bytecode_alu alu; + memset(&alu, 0, sizeof(alu)); + alu.op = ALU_OP2_MULHI_UINT; + alu.src[0].sel = 0; + alu.src[0].chan = 3; + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = (1ll << 32) / elements[i].instance_divisor + 1; + alu.dst.sel = i + 1; + alu.dst.chan = 3; + alu.dst.write = 1; + alu.last = 1; + if ((r = r600_bytecode_add_alu(&bc, &alu))) { + r600_bytecode_clear(&bc); + return NULL; + } + } + } + strides[elements[i].vertex_buffer_index] = elements[i].src_stride; + buffer_mask |= BITFIELD_BIT(elements[i].vertex_buffer_index); + } + + for (i = 0; i < count; i++) { + r600_vertex_data_type(elements[i].src_format, + &format, &num_format, &format_comp, &endian); + + desc = util_format_description(elements[i].src_format); + + if (elements[i].src_offset > 65535) { + r600_bytecode_clear(&bc); + R600_ERR("too big src_offset: %u\n", elements[i].src_offset); + return NULL; + } + + memset(&vtx, 0, sizeof(vtx)); + vtx.buffer_id = elements[i].vertex_buffer_index + fetch_resource_start; + vtx.fetch_type = elements[i].instance_divisor ? SQ_VTX_FETCH_INSTANCE_DATA : SQ_VTX_FETCH_VERTEX_DATA; + vtx.src_gpr = elements[i].instance_divisor > 1 ? i + 1 : 0; + vtx.src_sel_x = elements[i].instance_divisor ? 3 : 0; + vtx.mega_fetch_count = 0x1F; + vtx.dst_gpr = i + 1; + vtx.dst_sel_x = desc->swizzle[0]; + vtx.dst_sel_y = desc->swizzle[1]; + vtx.dst_sel_z = desc->swizzle[2]; + vtx.dst_sel_w = desc->swizzle[3]; + vtx.data_format = format; + vtx.num_format_all = num_format; + vtx.format_comp_all = format_comp; + vtx.offset = elements[i].src_offset; + vtx.endian = endian; + + if ((r = r600_bytecode_add_vtx(&bc, &vtx))) { + r600_bytecode_clear(&bc); + return NULL; + } + } + + r600_bytecode_add_cfinst(&bc, CF_OP_RET); + + if ((r = r600_bytecode_build(&bc))) { + r600_bytecode_clear(&bc); + return NULL; + } + + if (rctx->screen->b.debug_flags & DBG_FS) { + fprintf(stderr, "--------------------------------------------------------------\n"); + fprintf(stderr, "Vertex elements state:\n"); + for (i = 0; i < count; i++) { + fprintf(stderr, " "); + util_dump_vertex_element(stderr, elements+i); + fprintf(stderr, "\n"); + } + + r600_bytecode_disasm(&bc); + } + + fs_size = bc.ndw*4; + + /* Allocate the CSO. */ + shader = CALLOC_STRUCT(r600_fetch_shader); + if (!shader) { + r600_bytecode_clear(&bc); + return NULL; + } + memcpy(shader->strides, strides, sizeof(strides)); + shader->buffer_mask = buffer_mask; + + u_suballocator_alloc(&rctx->allocator_fetch_shader, fs_size, 256, + &shader->offset, + (struct pipe_resource**)&shader->buffer); + if (!shader->buffer) { + r600_bytecode_clear(&bc); + FREE(shader); + return NULL; + } + + bytecode = r600_buffer_map_sync_with_rings + (&rctx->b, shader->buffer, + PIPE_MAP_WRITE | PIPE_MAP_UNSYNCHRONIZED | RADEON_MAP_TEMPORARY); + bytecode += shader->offset / 4; + + if (UTIL_ARCH_BIG_ENDIAN) { + for (i = 0; i < fs_size / 4; ++i) { + bytecode[i] = util_cpu_to_le32(bc.bytecode[i]); + } + } else { + memcpy(bytecode, bc.bytecode, fs_size); + } + rctx->b.ws->buffer_unmap(rctx->b.ws, shader->buffer->buf); + + r600_bytecode_clear(&bc); + return shader; +} + int eg_get_interpolator_index(unsigned interpolate, unsigned location) { if (interpolate == TGSI_INTERPOLATE_COLOR || diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index 75139b87da3..71c94ec7feb 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -187,6 +187,10 @@ struct r600_pipe_shader { unsigned scratch_space_needed; /* size of scratch space (if > 0) counted in vec4 */ }; +void *r600_create_vertex_fetch_shader(struct pipe_context *ctx, + unsigned count, + const struct pipe_vertex_element *elements); + /* return the table index 0-5 for TGSI_INTERPOLATE_LINEAR/PERSPECTIVE and TGSI_INTERPOLATE_LOC_CENTER/SAMPLE/COUNT. Other input values return -1. */ int eg_get_interpolator_index(unsigned interpolate, unsigned location);