From: Marek Olšák <marek.ol...@amd.com> LLVM 3.8: - had broken indirect resource indexing - didn't have scratch coalescing - was the last user of problematic v16i8 - only supported OpenGL 4.1
This leaves us with LLVM 3.9 and LLVM 4.0 support for Mesa 17.2. --- configure.ac | 4 +- src/amd/common/ac_llvm_build.c | 179 ++++++--------------- src/amd/common/ac_llvm_util.c | 7 - src/gallium/drivers/radeon/r600_pipe_common.c | 10 +- src/gallium/drivers/radeonsi/si_pipe.c | 21 +-- src/gallium/drivers/radeonsi/si_shader.c | 42 ++--- src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c | 6 +- .../drivers/radeonsi/si_shader_tgsi_setup.c | 30 +--- 8 files changed, 81 insertions(+), 218 deletions(-) diff --git a/configure.ac b/configure.ac index ba04279..a614458 100644 --- a/configure.ac +++ b/configure.ac @@ -95,22 +95,22 @@ XCBGLX_REQUIRED=1.8.1 XDAMAGE_REQUIRED=1.1 XSHMFENCE_REQUIRED=1.1 XVMC_REQUIRED=1.0.6 PYTHON_MAKO_REQUIRED=0.8.0 LIBSENSORS_REQUIRED=4.0.0 ZLIB_REQUIRED=1.2.8 dnl LLVM versions LLVM_REQUIRED_GALLIUM=3.3.0 LLVM_REQUIRED_OPENCL=3.6.0 -LLVM_REQUIRED_R600=3.8.0 -LLVM_REQUIRED_RADEONSI=3.8.0 +LLVM_REQUIRED_R600=3.9.0 +LLVM_REQUIRED_RADEONSI=3.9.0 LLVM_REQUIRED_RADV=3.9.0 LLVM_REQUIRED_SWR=3.9.0 dnl Check for progs AC_PROG_CPP AC_PROG_CC AC_PROG_CXX AM_PROG_CC_C_O AM_PROG_AS AX_CHECK_GNU_MAKE diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 171016b..ba92e7e 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -226,56 +226,30 @@ struct cube_selection_coords { LLVMValueRef stc[2]; LLVMValueRef ma; LLVMValueRef id; }; static void build_cube_intrinsic(struct ac_llvm_context *ctx, LLVMValueRef in[3], struct cube_selection_coords *out) { - LLVMBuilderRef builder = ctx->builder; - - if (HAVE_LLVM >= 0x0309) { - LLVMTypeRef f32 = ctx->f32; - - out->stc[1] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubetc", - f32, in, 3, AC_FUNC_ATTR_READNONE); - out->stc[0] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubesc", - f32, in, 3, AC_FUNC_ATTR_READNONE); - out->ma = ac_build_intrinsic(ctx, "llvm.amdgcn.cubema", - f32, in, 3, AC_FUNC_ATTR_READNONE); - out->id = ac_build_intrinsic(ctx, "llvm.amdgcn.cubeid", - f32, in, 3, AC_FUNC_ATTR_READNONE); - } else { - LLVMValueRef c[4] = { - in[0], - in[1], - in[2], - LLVMGetUndef(LLVMTypeOf(in[0])) - }; - LLVMValueRef vec = ac_build_gather_values(ctx, c, 4); - - LLVMValueRef tmp = - ac_build_intrinsic(ctx, "llvm.AMDGPU.cube", - LLVMTypeOf(vec), &vec, 1, - AC_FUNC_ATTR_READNONE); - - out->stc[1] = LLVMBuildExtractElement(builder, tmp, - LLVMConstInt(ctx->i32, 0, 0), ""); - out->stc[0] = LLVMBuildExtractElement(builder, tmp, - LLVMConstInt(ctx->i32, 1, 0), ""); - out->ma = LLVMBuildExtractElement(builder, tmp, - LLVMConstInt(ctx->i32, 2, 0), ""); - out->id = LLVMBuildExtractElement(builder, tmp, - LLVMConstInt(ctx->i32, 3, 0), ""); - } + LLVMTypeRef f32 = ctx->f32; + + out->stc[1] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubetc", + f32, in, 3, AC_FUNC_ATTR_READNONE); + out->stc[0] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubesc", + f32, in, 3, AC_FUNC_ATTR_READNONE); + out->ma = ac_build_intrinsic(ctx, "llvm.amdgcn.cubema", + f32, in, 3, AC_FUNC_ATTR_READNONE); + out->id = ac_build_intrinsic(ctx, "llvm.amdgcn.cubeid", + f32, in, 3, AC_FUNC_ATTR_READNONE); } /** * Build a manual selection sequence for cube face sc/tc coordinates and * major axis vector (multiplied by 2 for consistency) for the given * vec3 \p coords, for the face implied by \p selcoords. * * For the major axis, we always adjust the sign to be in the direction of * selcoords.ma; i.e., a positive out_ma means that coords is pointed towards * the selcoords major axis. @@ -551,21 +525,21 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx, unsigned num_channels, LLVMValueRef voffset, LLVMValueRef soffset, unsigned inst_offset, bool glc, bool slc, bool writeonly_memory, bool has_add_tid) { /* TODO: Fix stores with ADD_TID and remove the "has_add_tid" flag. */ - if (HAVE_LLVM >= 0x0309 && !has_add_tid) { + if (!has_add_tid) { /* Split 3 channel stores, becase LLVM doesn't support 3-channel * intrinsics. */ if (num_channels == 3) { LLVMValueRef v[3], v01; for (int i = 0; i < 3; i++) { v[i] = LLVMBuildExtractElement(ctx->builder, vdata, LLVMConstInt(ctx->i32, i, 0), ""); } v01 = ac_build_gather_values(ctx, v, 2); @@ -656,124 +630,77 @@ ac_build_buffer_load(struct ac_llvm_context *ctx, LLVMValueRef vindex, LLVMValueRef voffset, LLVMValueRef soffset, unsigned inst_offset, unsigned glc, unsigned slc, bool readonly_memory) { unsigned func = CLAMP(num_channels, 1, 3) - 1; - if (HAVE_LLVM >= 0x309) { - LLVMValueRef args[] = { - LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""), - vindex ? vindex : LLVMConstInt(ctx->i32, 0, 0), - LLVMConstInt(ctx->i32, inst_offset, 0), - LLVMConstInt(ctx->i1, glc, 0), - LLVMConstInt(ctx->i1, slc, 0) - }; - - LLVMTypeRef types[] = {ctx->f32, LLVMVectorType(ctx->f32, 2), - ctx->v4f32}; - const char *type_names[] = {"f32", "v2f32", "v4f32"}; - char name[256]; - - if (voffset) { - args[2] = LLVMBuildAdd(ctx->builder, args[2], voffset, - ""); - } - - if (soffset) { - args[2] = LLVMBuildAdd(ctx->builder, args[2], soffset, - ""); - } - - snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s", - type_names[func]); - - return ac_build_intrinsic(ctx, name, types[func], args, - ARRAY_SIZE(args), - /* READNONE means writes can't - * affect it, while READONLY means - * that writes can affect it. */ - readonly_memory && HAVE_LLVM >= 0x0400 ? - AC_FUNC_ATTR_READNONE : - AC_FUNC_ATTR_READONLY); - } else { - LLVMValueRef args[] = { - LLVMBuildBitCast(ctx->builder, rsrc, ctx->v16i8, ""), - voffset ? voffset : vindex, - soffset, - LLVMConstInt(ctx->i32, inst_offset, 0), - LLVMConstInt(ctx->i32, voffset ? 1 : 0, 0), // offen - LLVMConstInt(ctx->i32, vindex ? 1 : 0, 0), //idxen - LLVMConstInt(ctx->i32, glc, 0), - LLVMConstInt(ctx->i32, slc, 0), - LLVMConstInt(ctx->i32, 0, 0), // TFE - }; + LLVMValueRef args[] = { + LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""), + vindex ? vindex : LLVMConstInt(ctx->i32, 0, 0), + LLVMConstInt(ctx->i32, inst_offset, 0), + LLVMConstInt(ctx->i1, glc, 0), + LLVMConstInt(ctx->i1, slc, 0) + }; - LLVMTypeRef types[] = {ctx->i32, LLVMVectorType(ctx->i32, 2), - ctx->v4i32}; - const char *type_names[] = {"i32", "v2i32", "v4i32"}; - const char *arg_type = "i32"; - char name[256]; + LLVMTypeRef types[] = {ctx->f32, LLVMVectorType(ctx->f32, 2), + ctx->v4f32}; + const char *type_names[] = {"f32", "v2f32", "v4f32"}; + char name[256]; - if (voffset && vindex) { - LLVMValueRef vaddr[] = {vindex, voffset}; + if (voffset) { + args[2] = LLVMBuildAdd(ctx->builder, args[2], voffset, + ""); + } - arg_type = "v2i32"; - args[1] = ac_build_gather_values(ctx, vaddr, 2); - } + if (soffset) { + args[2] = LLVMBuildAdd(ctx->builder, args[2], soffset, + ""); + } - snprintf(name, sizeof(name), "llvm.SI.buffer.load.dword.%s.%s", - type_names[func], arg_type); + snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s", + type_names[func]); - return ac_build_intrinsic(ctx, name, types[func], args, - ARRAY_SIZE(args), AC_FUNC_ATTR_READONLY); - } + return ac_build_intrinsic(ctx, name, types[func], args, + ARRAY_SIZE(args), + /* READNONE means writes can't affect it, while + * READONLY means that writes can affect it. */ + readonly_memory && HAVE_LLVM >= 0x0400 ? + AC_FUNC_ATTR_READNONE : + AC_FUNC_ATTR_READONLY); } LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vindex, LLVMValueRef voffset, bool readonly_memory) { - if (HAVE_LLVM >= 0x0309) { - LLVMValueRef args [] = { - LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""), - vindex, - voffset, - LLVMConstInt(ctx->i1, 0, 0), /* glc */ - LLVMConstInt(ctx->i1, 0, 0), /* slc */ - }; - - return ac_build_intrinsic(ctx, - "llvm.amdgcn.buffer.load.format.v4f32", - ctx->v4f32, args, ARRAY_SIZE(args), - /* READNONE means writes can't - * affect it, while READONLY means - * that writes can affect it. */ - readonly_memory && HAVE_LLVM >= 0x0400 ? - AC_FUNC_ATTR_READNONE : - AC_FUNC_ATTR_READONLY); - } - - LLVMValueRef args[] = { - LLVMBuildBitCast(ctx->builder, rsrc, ctx->v16i8, ""), - voffset, + LLVMValueRef args [] = { + LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""), vindex, + voffset, + LLVMConstInt(ctx->i1, 0, 0), /* glc */ + LLVMConstInt(ctx->i1, 0, 0), /* slc */ }; - return ac_build_intrinsic(ctx, "llvm.SI.vs.load.input", - ctx->v4f32, args, 3, - AC_FUNC_ATTR_READNONE | - AC_FUNC_ATTR_LEGACY); + + return ac_build_intrinsic(ctx, + "llvm.amdgcn.buffer.load.format.v4f32", + ctx->v4f32, args, ARRAY_SIZE(args), + /* READNONE means writes can't affect it, while + * READONLY means that writes can affect it. */ + readonly_memory && HAVE_LLVM >= 0x0400 ? + AC_FUNC_ATTR_READNONE : + AC_FUNC_ATTR_READONLY); } /** * Set range metadata on an instruction. This can only be used on load and * call instructions. If you know an instruction can only produce the values * 0, 1, 2, you would do set_range_metadata(value, 0, 3); * \p lo is the minimum value inclusive. * \p hi is the maximum value exclusive. */ static void set_range_metadata(struct ac_llvm_context *ctx, diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c index e2d5de2..8827975 100644 --- a/src/amd/common/ac_llvm_util.c +++ b/src/amd/common/ac_llvm_util.c @@ -98,31 +98,24 @@ static const char *ac_get_llvm_processor_name(enum radeon_family family) case CHIP_TONGA: return "tonga"; case CHIP_ICELAND: return "iceland"; case CHIP_CARRIZO: return "carrizo"; case CHIP_FIJI: return "fiji"; case CHIP_STONEY: return "stoney"; -#if HAVE_LLVM == 0x0308 - case CHIP_POLARIS10: - return "tonga"; - case CHIP_POLARIS11: - return "tonga"; -#else case CHIP_POLARIS10: return "polaris10"; case CHIP_POLARIS11: return "polaris11"; -#endif default: return ""; } } LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, bool supports_spill) { assert(family >= CHIP_TAHITI); const char *triple = supports_spill ? "amdgcn-mesa-mesa3d" : "amdgcn--"; diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index ce84139..19b8341 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -994,24 +994,24 @@ const char *r600_get_llvm_processor_name(enum radeon_family family) case CHIP_MULLINS: return "mullins"; case CHIP_TONGA: return "tonga"; case CHIP_ICELAND: return "iceland"; case CHIP_CARRIZO: return "carrizo"; case CHIP_FIJI: return "fiji"; case CHIP_STONEY: return "stoney"; case CHIP_POLARIS10: - return HAVE_LLVM >= 0x0309 ? "polaris10" : "carrizo"; + return "polaris10"; case CHIP_POLARIS11: case CHIP_POLARIS12: /* same as polaris11 */ - return HAVE_LLVM >= 0x0309 ? "polaris11" : "carrizo"; + return "polaris11"; case CHIP_VEGA10: return "gfx900"; default: return ""; } } static int r600_get_compute_param(struct pipe_screen *screen, enum pipe_shader_ir ir_type, enum pipe_compute_cap param, @@ -1059,37 +1059,37 @@ static int r600_get_compute_param(struct pipe_screen *screen, uint64_t *grid_size = ret; grid_size[0] = 65535; grid_size[1] = 65535; grid_size[2] = 65535; } return 3 * sizeof(uint64_t) ; case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: if (ret) { uint64_t *block_size = ret; - if (rscreen->chip_class >= SI && HAVE_LLVM >= 0x309 && + if (rscreen->chip_class >= SI && ir_type == PIPE_SHADER_IR_TGSI) { block_size[0] = 2048; block_size[1] = 2048; block_size[2] = 2048; } else { block_size[0] = 256; block_size[1] = 256; block_size[2] = 256; } } return 3 * sizeof(uint64_t); case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: if (ret) { uint64_t *max_threads_per_block = ret; - if (rscreen->chip_class >= SI && HAVE_LLVM >= 0x309 && + if (rscreen->chip_class >= SI && ir_type == PIPE_SHADER_IR_TGSI) *max_threads_per_block = 2048; else *max_threads_per_block = 256; } return sizeof(uint64_t); case PIPE_COMPUTE_CAP_ADDRESS_BITS: if (ret) { uint32_t *address_bits = ret; address_bits[0] = 32; @@ -1167,21 +1167,21 @@ static int r600_get_compute_param(struct pipe_screen *screen, break; /* unused */ case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: if (ret) { uint32_t *subgroup_size = ret; *subgroup_size = r600_wavefront_size(rscreen->family); } return sizeof(uint32_t); case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK: if (ret) { uint64_t *max_variable_threads_per_block = ret; - if (rscreen->chip_class >= SI && HAVE_LLVM >= 0x309 && + if (rscreen->chip_class >= SI && ir_type == PIPE_SHADER_IR_TGSI) *max_variable_threads_per_block = SI_MAX_VARIABLE_THREADS_PER_BLOCK; else *max_variable_threads_per_block = 0; } return sizeof(uint64_t); } fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param); return 0; diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 47d170a..0d1721f 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -320,22 +320,21 @@ fail: return NULL; } /* * pipe_screen */ static bool si_have_tgsi_compute(struct si_screen *sscreen) { /* Old kernels disallowed some register writes for SI * that are used for indirect dispatches. */ - return HAVE_LLVM >= 0x309 && - (sscreen->b.chip_class >= CIK || + return (sscreen->b.chip_class >= CIK || sscreen->b.info.drm_major == 3 || (sscreen->b.info.drm_major == 2 && sscreen->b.info.drm_minor >= 45)); } static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) { struct si_screen *sscreen = (struct si_screen *)pscreen; switch (param) { @@ -415,26 +414,24 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_CLEAR_TEXTURE: case PIPE_CAP_CULL_DISTANCE: case PIPE_CAP_TGSI_ARRAY_COMPONENTS: case PIPE_CAP_TGSI_CAN_READ_OUTPUTS: case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY: case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS: case PIPE_CAP_DOUBLES: case PIPE_CAP_TGSI_TEX_TXF_LZ: case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT: - return 1; - case PIPE_CAP_INT64: case PIPE_CAP_INT64_DIVMOD: case PIPE_CAP_TGSI_CLOCK: - return HAVE_LLVM >= 0x0309; + return 1; case PIPE_CAP_TGSI_VOTE: return HAVE_LLVM >= 0x0400; case PIPE_CAP_TGSI_BALLOT: return HAVE_LLVM >= 0x0500; case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: return !SI_BIG_ENDIAN && sscreen->b.info.has_userptr; @@ -451,29 +448,27 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) sscreen->b.info.drm_major == 3; case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: return R600_MAP_BUFFER_ALIGNMENT; case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: case PIPE_CAP_MAX_VERTEX_STREAMS: - return 4; - case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: - return HAVE_LLVM >= 0x0309 ? 4 : 0; + return 4; case PIPE_CAP_GLSL_FEATURE_LEVEL: if (si_have_tgsi_compute(sscreen)) return 450; - return HAVE_LLVM >= 0x0309 ? 420 : 410; + return 420; case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: return MIN2(sscreen->b.info.max_alloc_size, INT_MAX); case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: /* SI doesn't support unaligned loads. * CIK needs DRM 2.50.0 on radeon. */ return sscreen->b.chip_class == SI || @@ -649,23 +644,23 @@ static int si_get_shader_param(struct pipe_screen* pscreen, case PIPE_SHADER_CAP_MAX_TEMPS: return 256; /* Max native temporaries. */ case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: return 4096 * sizeof(float[4]); /* actually only memory limits this */ case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: return SI_NUM_CONST_BUFFERS; case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: return SI_NUM_SAMPLERS; case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: - return HAVE_LLVM >= 0x0309 ? SI_NUM_SHADER_BUFFERS : 0; + return SI_NUM_SHADER_BUFFERS; case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: - return HAVE_LLVM >= 0x0309 ? SI_NUM_IMAGES : 0; + return SI_NUM_IMAGES; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD: return 3; /* Supported boolean features. */ case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: @@ -880,23 +875,21 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) (sscreen->b.chip_class == VI && sscreen->b.info.pfp_fw_version >= 121 && sscreen->b.info.me_fw_version >= 87) || (sscreen->b.chip_class == CIK && sscreen->b.info.pfp_fw_version >= 211 && sscreen->b.info.me_fw_version >= 173) || (sscreen->b.chip_class == SI && sscreen->b.info.pfp_fw_version >= 121 && sscreen->b.info.me_fw_version >= 87); - sscreen->has_ds_bpermute = HAVE_LLVM >= 0x0309 && - sscreen->b.chip_class >= VI; - + sscreen->has_ds_bpermute = sscreen->b.chip_class >= VI; sscreen->has_msaa_sample_loc_bug = (sscreen->b.family >= CHIP_POLARIS10 && sscreen->b.family <= CHIP_POLARIS12) || sscreen->b.family == CHIP_VEGA10; sscreen->b.has_cp_dma = true; sscreen->b.has_streamout = true; /* Some chips have RB+ registers, but don't support RB+. Those must * always disable it. */ diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 3ac1ef4..427afd5 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -585,27 +585,20 @@ static LLVMValueRef get_indirect_index(struct si_shader_context *ctx, /** * Like get_indirect_index, but restricts the return value to a (possibly * undefined) value inside [0..num). */ static LLVMValueRef get_bounded_indirect_index(struct si_shader_context *ctx, const struct tgsi_ind_register *ind, int rel_index, unsigned num) { LLVMValueRef result = get_indirect_index(ctx, ind, rel_index); - /* LLVM 3.8: If indirect resource indexing is used: - * - SI & CIK hang - * - VI crashes - */ - if (HAVE_LLVM == 0x0308) - return LLVMGetUndef(ctx->i32); - return si_llvm_bound_index(ctx, result, num); } /** * Calculate a dword address given an input or output register and a stride. */ static LLVMValueRef get_dw_address(struct si_shader_context *ctx, const struct tgsi_full_dst_register *dst, const struct tgsi_full_src_register *src, @@ -1631,31 +1624,26 @@ static void declare_system_value(struct si_shader_context *ctx, } value = lp_build_gather_values(gallivm, values, 3); break; } case TGSI_SEMANTIC_THREAD_ID: value = LLVMGetParam(ctx->main_fn, ctx->param_thread_id); break; case TGSI_SEMANTIC_HELPER_INVOCATION: - if (HAVE_LLVM >= 0x0309) { - value = lp_build_intrinsic(gallivm->builder, - "llvm.amdgcn.ps.live", - ctx->i1, NULL, 0, - LP_FUNC_ATTR_READNONE); - value = LLVMBuildNot(gallivm->builder, value, ""); - value = LLVMBuildSExt(gallivm->builder, value, ctx->i32, ""); - } else { - assert(!"TGSI_SEMANTIC_HELPER_INVOCATION unsupported"); - return; - } + value = lp_build_intrinsic(gallivm->builder, + "llvm.amdgcn.ps.live", + ctx->i1, NULL, 0, + LP_FUNC_ATTR_READNONE); + value = LLVMBuildNot(gallivm->builder, value, ""); + value = LLVMBuildSExt(gallivm->builder, value, ctx->i32, ""); break; case TGSI_SEMANTIC_SUBGROUP_SIZE: value = LLVMConstInt(ctx->i32, 64, 0); break; case TGSI_SEMANTIC_SUBGROUP_INVOCATION: value = ac_get_thread_id(&ctx->ac); break; @@ -4253,26 +4241,24 @@ static void atomic_emit_memory(struct si_shader_context *ctx, arg = lp_build_emit_fetch(&ctx->bld_base, inst, 2, 0); arg = LLVMBuildBitCast(builder, arg, ctx->i32, ""); if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) { LLVMValueRef new_data; new_data = lp_build_emit_fetch(&ctx->bld_base, inst, 3, 0); new_data = LLVMBuildBitCast(builder, new_data, ctx->i32, ""); -#if HAVE_LLVM >= 0x309 result = LLVMBuildAtomicCmpXchg(builder, ptr, arg, new_data, LLVMAtomicOrderingSequentiallyConsistent, LLVMAtomicOrderingSequentiallyConsistent, false); -#endif result = LLVMBuildExtractValue(builder, result, 0, ""); } else { LLVMAtomicRMWBinOp op; switch(inst->Instruction.Opcode) { case TGSI_OPCODE_ATOMUADD: op = LLVMAtomicRMWBinOpAdd; break; case TGSI_OPCODE_ATOMXCHG: @@ -5659,30 +5645,28 @@ static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { struct si_shader_context *ctx = si_shader_context(bld_base); struct gallivm_state *gallivm = &ctx->gallivm; /* SI only (thanks to a hw bug workaround): * The real barrier instruction isn’t needed, because an entire patch * always fits into a single wave. */ - if (HAVE_LLVM >= 0x0309 && - ctx->screen->b.chip_class == SI && + if (ctx->screen->b.chip_class == SI && ctx->type == PIPE_SHADER_TESS_CTRL) { emit_waitcnt(ctx, LGKM_CNT & VM_CNT); return; } lp_build_intrinsic(gallivm->builder, - HAVE_LLVM >= 0x0309 ? "llvm.amdgcn.s.barrier" - : "llvm.AMDGPU.barrier.local", + "llvm.amdgcn.s.barrier", ctx->voidt, NULL, 0, LP_FUNC_ATTR_CONVERGENT); } static const struct lp_build_tgsi_action tex_action = { .fetch_args = tex_fetch_args, .emit = build_tex_intrinsic, }; static const struct lp_build_tgsi_action interp_action = { .fetch_args = interp_fetch_args, @@ -6489,28 +6473,22 @@ void si_shader_binary_read_config(struct ac_shader_binary *binary, void si_shader_apply_scratch_relocs(struct si_context *sctx, struct si_shader *shader, struct si_shader_config *config, uint64_t scratch_va) { unsigned i; uint32_t scratch_rsrc_dword0 = scratch_va; uint32_t scratch_rsrc_dword1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32); - /* Enable scratch coalescing if LLVM sets ELEMENT_SIZE & INDEX_STRIDE - * correctly. - */ - if (HAVE_LLVM >= 0x0309) - scratch_rsrc_dword1 |= S_008F04_SWIZZLE_ENABLE(1); - else - scratch_rsrc_dword1 |= - S_008F04_STRIDE(config->scratch_bytes_per_wave / 64); + /* Enable scratch coalescing. */ + scratch_rsrc_dword1 |= S_008F04_SWIZZLE_ENABLE(1); for (i = 0 ; i < shader->binary.reloc_count; i++) { const struct ac_shader_reloc *reloc = &shader->binary.relocs[i]; if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name)) { util_memcpy_cpu_to_le32(shader->binary.code + reloc->offset, &scratch_rsrc_dword0, 4); } else if (!strcmp(scratch_rsrc_dword1_symbol, reloc->name)) { util_memcpy_cpu_to_le32(shader->binary.code + reloc->offset, &scratch_rsrc_dword1, 4); diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c index 1e2d75d..9fa56c7 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c @@ -694,22 +694,21 @@ static void emit_fdiv(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { struct si_shader_context *ctx = si_shader_context(bld_base); emit_data->output[emit_data->chan] = LLVMBuildFDiv(ctx->gallivm.builder, emit_data->args[0], emit_data->args[1], ""); /* Use v_rcp_f32 instead of precise division. */ - if (HAVE_LLVM >= 0x0309 && - !LLVMIsConstant(emit_data->output[emit_data->chan])) + if (!LLVMIsConstant(emit_data->output[emit_data->chan])) LLVMSetMetadata(emit_data->output[emit_data->chan], ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp); } /* 1/sqrt is translated to rsq for f32 if fp32 denormals are not enabled in * the target machine. f64 needs global unsafe math flags to get rsq. */ static void emit_rsq(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { @@ -741,22 +740,21 @@ void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base) bld_base->op_actions[TGSI_OPCODE_DFMA].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_DFMA].intr_name = "llvm.fma.f64"; bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = emit_frac; bld_base->op_actions[TGSI_OPCODE_DIV].emit = emit_fdiv; bld_base->op_actions[TGSI_OPCODE_DNEG].emit = emit_dneg; bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = emit_dcmp; bld_base->op_actions[TGSI_OPCODE_DSGE].emit = emit_dcmp; bld_base->op_actions[TGSI_OPCODE_DSLT].emit = emit_dcmp; bld_base->op_actions[TGSI_OPCODE_DSNE].emit = emit_dcmp; bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = build_tgsi_intrinsic_nomem; - bld_base->op_actions[TGSI_OPCODE_DRSQ].intr_name = - HAVE_LLVM >= 0x0309 ? "llvm.amdgcn.rsq.f64" : "llvm.AMDGPU.rsq.f64"; + bld_base->op_actions[TGSI_OPCODE_DRSQ].intr_name = "llvm.amdgcn.rsq.f64"; bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_DSQRT].intr_name = "llvm.sqrt.f64"; bld_base->op_actions[TGSI_OPCODE_EX2].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = "llvm.exp2.f32"; bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "llvm.floor.f32"; bld_base->op_actions[TGSI_OPCODE_FMA].emit = bld_base->op_actions[TGSI_OPCODE_MAD].emit; bld_base->op_actions[TGSI_OPCODE_FRC].emit = emit_frac; bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i; diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c index 66b1916..a2c7e67 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c @@ -43,34 +43,20 @@ #include <llvm-c/Support.h> /* Data for if/else/endif and bgnloop/endloop control flow structures. */ struct si_llvm_flow { /* Loop exit or next part of if/else/endif. */ LLVMBasicBlockRef next_block; LLVMBasicBlockRef loop_entry_block; }; -#define CPU_STRING_LEN 30 -#define FS_STRING_LEN 30 -#define TRIPLE_STRING_LEN 7 - -/** - * Shader types for the LLVM backend. - */ -enum si_llvm_shader_type { - RADEON_LLVM_SHADER_PS = 0, - RADEON_LLVM_SHADER_VS = 1, - RADEON_LLVM_SHADER_GS = 2, - RADEON_LLVM_SHADER_CS = 3, -}; - enum si_llvm_calling_convention { RADEON_LLVM_AMDGPU_VS = 87, RADEON_LLVM_AMDGPU_GS = 88, RADEON_LLVM_AMDGPU_PS = 89, RADEON_LLVM_AMDGPU_CS = 90, }; void si_llvm_add_attribute(LLVMValueRef F, const char *name, int value) { char str[16]; @@ -79,50 +65,42 @@ void si_llvm_add_attribute(LLVMValueRef F, const char *name, int value) LLVMAddTargetDependentFunctionAttr(F, name, str); } /** * Set the shader type we want to compile * * @param type shader type to set */ void si_llvm_shader_type(LLVMValueRef F, unsigned type) { - enum si_llvm_shader_type llvm_type; enum si_llvm_calling_convention calling_conv; switch (type) { case PIPE_SHADER_VERTEX: case PIPE_SHADER_TESS_CTRL: case PIPE_SHADER_TESS_EVAL: - llvm_type = RADEON_LLVM_SHADER_VS; calling_conv = RADEON_LLVM_AMDGPU_VS; break; case PIPE_SHADER_GEOMETRY: - llvm_type = RADEON_LLVM_SHADER_GS; calling_conv = RADEON_LLVM_AMDGPU_GS; break; case PIPE_SHADER_FRAGMENT: - llvm_type = RADEON_LLVM_SHADER_PS; calling_conv = RADEON_LLVM_AMDGPU_PS; break; case PIPE_SHADER_COMPUTE: - llvm_type = RADEON_LLVM_SHADER_CS; calling_conv = RADEON_LLVM_AMDGPU_CS; break; default: unreachable("Unhandle shader type"); } - if (HAVE_LLVM >= 0x309) - LLVMSetFunctionCallConv(F, calling_conv); - else - si_llvm_add_attribute(F, "ShaderType", llvm_type); + LLVMSetFunctionCallConv(F, calling_conv); } static void init_amdgpu_target() { gallivm_init_llvm_targets(); LLVMInitializeAMDGPUTargetInfo(); LLVMInitializeAMDGPUTarget(); LLVMInitializeAMDGPUTargetMC(); LLVMInitializeAMDGPUAsmPrinter(); @@ -841,24 +819,22 @@ static void emit_declaration(struct lp_build_tgsi_context *bld_base, * * Note that 16 is the number of vector elements that * LLVM will store in a register, so theoretically an * array with up to 4 * 16 = 64 elements could be * handled this way, but whether that's a good idea * depends on VGPR register pressure elsewhere. * * FIXME: We shouldn't need to have the non-alloca * code path for arrays. LLVM should be smart enough to * promote allocas into registers when profitable. - * - * LLVM 3.8 crashes with this. */ - if ((HAVE_LLVM >= 0x0309 && array_size > 16) || + if (array_size > 16 || /* TODO: VGPR indexing is buggy on GFX9. */ ctx->screen->b.chip_class == GFX9) { array_alloca = LLVMBuildAlloca(builder, LLVMArrayType(ctx->f32, array_size), "array"); ctx->temp_array_allocas[id] = array_alloca; } } if (!ctx->temps_count) { @@ -1267,27 +1243,25 @@ void si_llvm_context_init(struct si_shader_context *ctx, */ memset(ctx, 0, sizeof(*ctx)); ctx->screen = sscreen; ctx->tm = tm; ctx->gallivm.context = LLVMContextCreate(); ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi", ctx->gallivm.context); LLVMSetTarget(ctx->gallivm.module, "amdgcn--"); -#if HAVE_LLVM >= 0x0309 LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm); char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout); LLVMSetDataLayout(ctx->gallivm.module, data_layout_str); LLVMDisposeTargetData(data_layout); LLVMDisposeMessage(data_layout_str); -#endif bool unsafe_fpmath = (sscreen->b.debug_flags & DBG_UNSAFE_MATH) != 0; enum lp_float_mode float_mode = unsafe_fpmath ? LP_FLOAT_MODE_UNSAFE_FP_MATH : LP_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH; ctx->gallivm.builder = lp_create_builder(ctx->gallivm.context, float_mode); ac_llvm_context_init(&ctx->ac, ctx->gallivm.context); -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev