Thanks! Reviewed-by: Marek Olšák <marek.ol...@amd.com>
Marek On Fri, Feb 3, 2017 at 1:05 AM, Dave Airlie <airl...@gmail.com> wrote: > From: Dave Airlie <airl...@redhat.com> > > Suggested by Marek. > > Signed-off-by: Dave Airlie <airl...@redhat.com> > --- > src/amd/Makefile.sources | 2 + > src/amd/common/ac_llvm_build.c | 752 > ++++++++++++++++++++++ > src/amd/common/ac_llvm_build.h | 177 +++++ > src/amd/common/ac_llvm_util.c | 717 +-------------------- > src/amd/common/ac_llvm_util.h | 135 ---- > src/amd/common/ac_nir_to_llvm.c | 1 + > src/gallium/drivers/radeonsi/si_shader_internal.h | 1 + > 7 files changed, 934 insertions(+), 851 deletions(-) > create mode 100644 src/amd/common/ac_llvm_build.c > create mode 100644 src/amd/common/ac_llvm_build.h > > diff --git a/src/amd/Makefile.sources b/src/amd/Makefile.sources > index d981453..7aaa90a 100644 > --- a/src/amd/Makefile.sources > +++ b/src/amd/Makefile.sources > @@ -29,6 +29,8 @@ ADDRLIB_FILES = \ > AMD_COMPILER_FILES = \ > common/ac_binary.c \ > common/ac_binary.h \ > + common/ac_llvm_build.c \ > + common/ac_llvm_build.h \ > common/ac_llvm_helper.cpp \ > common/ac_llvm_util.c \ > common/ac_llvm_util.h > diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c > new file mode 100644 > index 0000000..afcbf31 > --- /dev/null > +++ b/src/amd/common/ac_llvm_build.c > @@ -0,0 +1,752 @@ > +/* > + * Copyright 2014 Advanced Micro Devices, Inc. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the > + * "Software"), to deal in the Software without restriction, including > + * without limitation the rights to use, copy, modify, merge, publish, > + * distribute, sub license, and/or sell copies of the Software, and to > + * permit persons to whom the Software is furnished to do so, subject to > + * the following conditions: > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL > + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY > CLAIM, > + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR > + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE > + * USE OR OTHER DEALINGS IN THE SOFTWARE. > + * > + * The above copyright notice and this permission notice (including the > + * next paragraph) shall be included in all copies or substantial portions > + * of the Software. > + * > + */ > +/* based on pieces from si_pipe.c and radeon_llvm_emit.c */ > +#include "ac_llvm_build.h" > + > +#include <llvm-c/Core.h> > + > +#include "c11/threads.h" > + > +#include <assert.h> > +#include <stdio.h> > + > +#include "ac_llvm_util.h" > + > +#include "util/bitscan.h" > +#include "util/macros.h" > +#include "sid.h" > + > +/* Initialize module-independent parts of the context. > + * > + * The caller is responsible for initializing ctx::module and ctx::builder. > + */ > +void > +ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context) > +{ > + LLVMValueRef args[1]; > + > + ctx->context = context; > + ctx->module = NULL; > + ctx->builder = NULL; > + > + ctx->voidt = LLVMVoidTypeInContext(ctx->context); > + ctx->i1 = LLVMInt1TypeInContext(ctx->context); > + ctx->i8 = LLVMInt8TypeInContext(ctx->context); > + ctx->i32 = LLVMIntTypeInContext(ctx->context, 32); > + ctx->f32 = LLVMFloatTypeInContext(ctx->context); > + ctx->v4i32 = LLVMVectorType(ctx->i32, 4); > + ctx->v4f32 = LLVMVectorType(ctx->f32, 4); > + ctx->v16i8 = LLVMVectorType(ctx->i8, 16); > + > + ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context, > + "range", 5); > + > + ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context, > + > "invariant.load", 14); > + > + ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->context, > "fpmath", 6); > + > + args[0] = LLVMConstReal(ctx->f32, 2.5); > + ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1); > + > + ctx->uniform_md_kind = LLVMGetMDKindIDInContext(ctx->context, > + "amdgpu.uniform", 14); > + > + ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0); > +} > + > +LLVMValueRef > +ac_emit_llvm_intrinsic(struct ac_llvm_context *ctx, const char *name, > + LLVMTypeRef return_type, LLVMValueRef *params, > + unsigned param_count, unsigned attrib_mask) > +{ > + LLVMValueRef function; > + > + function = LLVMGetNamedFunction(ctx->module, name); > + if (!function) { > + LLVMTypeRef param_types[32], function_type; > + unsigned i; > + > + assert(param_count <= 32); > + > + for (i = 0; i < param_count; ++i) { > + assert(params[i]); > + param_types[i] = LLVMTypeOf(params[i]); > + } > + function_type = > + LLVMFunctionType(return_type, param_types, param_count, > 0); > + function = LLVMAddFunction(ctx->module, name, function_type); > + > + LLVMSetFunctionCallConv(function, LLVMCCallConv); > + LLVMSetLinkage(function, LLVMExternalLinkage); > + > + attrib_mask |= AC_FUNC_ATTR_NOUNWIND; > + while (attrib_mask) { > + enum ac_func_attr attr = 1u << > u_bit_scan(&attrib_mask); > + ac_add_function_attr(function, -1, attr); > + } > + } > + return LLVMBuildCall(ctx->builder, function, params, param_count, ""); > +} > + > +LLVMValueRef > +ac_build_gather_values_extended(struct ac_llvm_context *ctx, > + LLVMValueRef *values, > + unsigned value_count, > + unsigned value_stride, > + bool load) > +{ > + LLVMBuilderRef builder = ctx->builder; > + LLVMValueRef vec; > + unsigned i; > + > + > + if (value_count == 1) { > + if (load) > + return LLVMBuildLoad(builder, values[0], ""); > + return values[0]; > + } else if (!value_count) > + unreachable("value_count is 0"); > + > + for (i = 0; i < value_count; i++) { > + LLVMValueRef value = values[i * value_stride]; > + if (load) > + value = LLVMBuildLoad(builder, value, ""); > + > + if (!i) > + vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), > value_count)); > + LLVMValueRef index = LLVMConstInt(ctx->i32, i, false); > + vec = LLVMBuildInsertElement(builder, vec, value, index, ""); > + } > + return vec; > +} > + > +LLVMValueRef > +ac_build_gather_values(struct ac_llvm_context *ctx, > + LLVMValueRef *values, > + unsigned value_count) > +{ > + return ac_build_gather_values_extended(ctx, values, value_count, 1, > false); > +} > + > +LLVMValueRef > +ac_emit_fdiv(struct ac_llvm_context *ctx, > + LLVMValueRef num, > + LLVMValueRef den) > +{ > + LLVMValueRef ret = LLVMBuildFDiv(ctx->builder, num, den, ""); > + > + if (!LLVMIsConstant(ret)) > + LLVMSetMetadata(ret, ctx->fpmath_md_kind, > ctx->fpmath_md_2p5_ulp); > + return ret; > +} > + > +/* Coordinates for cube map selection. sc, tc, and ma are as in Table 8.27 > + * of the OpenGL 4.5 (Compatibility Profile) specification, except ma is > + * already multiplied by two. id is the cube face number. > + */ > +struct cube_selection_coords { > + LLVMValueRef stc[2]; > + LLVMValueRef ma; > + LLVMValueRef id; > +}; > + > +static void > +build_cube_intrinsic(struct ac_llvm_context *ctx, > + LLVMValueRef in[3], > + struct cube_selection_coords *out) > +{ > + LLVMBuilderRef builder = ctx->builder; > + > + if (HAVE_LLVM >= 0x0309) { > + LLVMTypeRef f32 = ctx->f32; > + > + out->stc[1] = ac_emit_llvm_intrinsic(ctx, > "llvm.amdgcn.cubetc", > + f32, in, 3, AC_FUNC_ATTR_READNONE); > + out->stc[0] = ac_emit_llvm_intrinsic(ctx, > "llvm.amdgcn.cubesc", > + f32, in, 3, AC_FUNC_ATTR_READNONE); > + out->ma = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubema", > + f32, in, 3, AC_FUNC_ATTR_READNONE); > + out->id = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubeid", > + f32, in, 3, AC_FUNC_ATTR_READNONE); > + } else { > + LLVMValueRef c[4] = { > + in[0], > + in[1], > + in[2], > + LLVMGetUndef(LLVMTypeOf(in[0])) > + }; > + LLVMValueRef vec = ac_build_gather_values(ctx, c, 4); > + > + LLVMValueRef tmp = > + ac_emit_llvm_intrinsic(ctx, "llvm.AMDGPU.cube", > + LLVMTypeOf(vec), &vec, 1, > + AC_FUNC_ATTR_READNONE); > + > + out->stc[1] = LLVMBuildExtractElement(builder, tmp, > + LLVMConstInt(ctx->i32, 0, 0), ""); > + out->stc[0] = LLVMBuildExtractElement(builder, tmp, > + LLVMConstInt(ctx->i32, 1, 0), ""); > + out->ma = LLVMBuildExtractElement(builder, tmp, > + LLVMConstInt(ctx->i32, 2, 0), ""); > + out->id = LLVMBuildExtractElement(builder, tmp, > + LLVMConstInt(ctx->i32, 3, 0), ""); > + } > +} > + > +/** > + * Build a manual selection sequence for cube face sc/tc coordinates and > + * major axis vector (multiplied by 2 for consistency) for the given > + * vec3 \p coords, for the face implied by \p selcoords. > + * > + * For the major axis, we always adjust the sign to be in the direction of > + * selcoords.ma; i.e., a positive out_ma means that coords is pointed towards > + * the selcoords major axis. > + */ > +static void build_cube_select(LLVMBuilderRef builder, > + const struct cube_selection_coords *selcoords, > + const LLVMValueRef *coords, > + LLVMValueRef *out_st, > + LLVMValueRef *out_ma) > +{ > + LLVMTypeRef f32 = LLVMTypeOf(coords[0]); > + LLVMValueRef is_ma_positive; > + LLVMValueRef sgn_ma; > + LLVMValueRef is_ma_z, is_not_ma_z; > + LLVMValueRef is_ma_y; > + LLVMValueRef is_ma_x; > + LLVMValueRef sgn; > + LLVMValueRef tmp; > + > + is_ma_positive = LLVMBuildFCmp(builder, LLVMRealUGE, > + selcoords->ma, LLVMConstReal(f32, 0.0), ""); > + sgn_ma = LLVMBuildSelect(builder, is_ma_positive, > + LLVMConstReal(f32, 1.0), LLVMConstReal(f32, -1.0), ""); > + > + is_ma_z = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, > LLVMConstReal(f32, 4.0), ""); > + is_not_ma_z = LLVMBuildNot(builder, is_ma_z, ""); > + is_ma_y = LLVMBuildAnd(builder, is_not_ma_z, > + LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, > LLVMConstReal(f32, 2.0), ""), ""); > + is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, > is_ma_y, ""), ""); > + > + /* Select sc */ > + tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], coords[0], ""); > + sgn = LLVMBuildSelect(builder, is_ma_y, LLVMConstReal(f32, 1.0), > + LLVMBuildSelect(builder, is_ma_x, sgn_ma, > + LLVMBuildFNeg(builder, sgn_ma, ""), ""), ""); > + out_st[0] = LLVMBuildFMul(builder, tmp, sgn, ""); > + > + /* Select tc */ > + tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], ""); > + sgn = LLVMBuildSelect(builder, is_ma_y, LLVMBuildFNeg(builder, > sgn_ma, ""), > + LLVMConstReal(f32, -1.0), ""); > + out_st[1] = LLVMBuildFMul(builder, tmp, sgn, ""); > + > + /* Select ma */ > + tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], > + LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), > ""); > + sgn = LLVMBuildSelect(builder, is_ma_positive, > + LLVMConstReal(f32, 2.0), LLVMConstReal(f32, -2.0), ""); > + *out_ma = LLVMBuildFMul(builder, tmp, sgn, ""); > +} > + > +void > +ac_prepare_cube_coords(struct ac_llvm_context *ctx, > + bool is_deriv, bool is_array, > + LLVMValueRef *coords_arg, > + LLVMValueRef *derivs_arg) > +{ > + > + LLVMBuilderRef builder = ctx->builder; > + struct cube_selection_coords selcoords; > + LLVMValueRef coords[3]; > + LLVMValueRef invma; > + > + build_cube_intrinsic(ctx, coords_arg, &selcoords); > + > + invma = ac_emit_llvm_intrinsic(ctx, "llvm.fabs.f32", > + ctx->f32, &selcoords.ma, 1, AC_FUNC_ATTR_READNONE); > + invma = ac_emit_fdiv(ctx, LLVMConstReal(ctx->f32, 1.0), invma); > + > + for (int i = 0; i < 2; ++i) > + coords[i] = LLVMBuildFMul(builder, selcoords.stc[i], invma, > ""); > + > + coords[2] = selcoords.id; > + > + if (is_deriv && derivs_arg) { > + LLVMValueRef derivs[4]; > + int axis; > + > + /* Convert cube derivatives to 2D derivatives. */ > + for (axis = 0; axis < 2; axis++) { > + LLVMValueRef deriv_st[2]; > + LLVMValueRef deriv_ma; > + > + /* Transform the derivative alongside the texture > + * coordinate. Mathematically, the correct formula is > + * as follows. Assume we're projecting onto the +Z > face > + * and denote by dx/dh the derivative of the > (original) > + * X texture coordinate with respect to horizontal > + * window coordinates. The projection onto the +Z face > + * plane is: > + * > + * f(x,z) = x/z > + * > + * Then df/dh = df/dx * dx/dh + df/dz * dz/dh > + * = 1/z * dx/dh - x/z * 1/z * dz/dh. > + * > + * This motivatives the implementation below. > + * > + * Whether this actually gives the expected results > for > + * apps that might feed in derivatives obtained via > + * finite differences is anyone's guess. The OpenGL > spec > + * seems awfully quiet about how textureGrad for cube > + * maps should be handled. > + */ > + build_cube_select(builder, &selcoords, > &derivs_arg[axis * 3], > + deriv_st, &deriv_ma); > + > + deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, > ""); > + > + for (int i = 0; i < 2; ++i) > + derivs[axis * 2 + i] = > + LLVMBuildFSub(builder, > + LLVMBuildFMul(builder, > deriv_st[i], invma, ""), > + LLVMBuildFMul(builder, > deriv_ma, coords[i], ""), ""); > + } > + > + memcpy(derivs_arg, derivs, sizeof(derivs)); > + } > + > + /* Shift the texture coordinate. This must be applied after the > + * derivative calculation. > + */ > + for (int i = 0; i < 2; ++i) > + coords[i] = LLVMBuildFAdd(builder, coords[i], > LLVMConstReal(ctx->f32, 1.5), ""); > + > + if (is_array) { > + /* for cube arrays coord.z = coord.w(array_index) * 8 + face > */ > + /* coords_arg.w component - array_index for cube arrays */ > + LLVMValueRef tmp = LLVMBuildFMul(ctx->builder, coords_arg[3], > LLVMConstReal(ctx->f32, 8.0), ""); > + coords[2] = LLVMBuildFAdd(ctx->builder, tmp, coords[2], ""); > + } > + > + memcpy(coords_arg, coords, sizeof(coords)); > +} > + > + > +LLVMValueRef > +ac_build_fs_interp(struct ac_llvm_context *ctx, > + LLVMValueRef llvm_chan, > + LLVMValueRef attr_number, > + LLVMValueRef params, > + LLVMValueRef i, > + LLVMValueRef j) > +{ > + LLVMValueRef args[5]; > + LLVMValueRef p1; > + > + if (HAVE_LLVM < 0x0400) { > + LLVMValueRef ij[2]; > + ij[0] = LLVMBuildBitCast(ctx->builder, i, ctx->i32, ""); > + ij[1] = LLVMBuildBitCast(ctx->builder, j, ctx->i32, ""); > + > + args[0] = llvm_chan; > + args[1] = attr_number; > + args[2] = params; > + args[3] = ac_build_gather_values(ctx, ij, 2); > + return ac_emit_llvm_intrinsic(ctx, "llvm.SI.fs.interp", > + ctx->f32, args, 4, > + AC_FUNC_ATTR_READNONE); > + } > + > + args[0] = i; > + args[1] = llvm_chan; > + args[2] = attr_number; > + args[3] = params; > + > + p1 = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.interp.p1", > + ctx->f32, args, 4, AC_FUNC_ATTR_READNONE); > + > + args[0] = p1; > + args[1] = j; > + args[2] = llvm_chan; > + args[3] = attr_number; > + args[4] = params; > + > + return ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.interp.p2", > + ctx->f32, args, 5, > AC_FUNC_ATTR_READNONE); > +} > + > +LLVMValueRef > +ac_build_fs_interp_mov(struct ac_llvm_context *ctx, > + LLVMValueRef parameter, > + LLVMValueRef llvm_chan, > + LLVMValueRef attr_number, > + LLVMValueRef params) > +{ > + LLVMValueRef args[4]; > + if (HAVE_LLVM < 0x0400) { > + args[0] = llvm_chan; > + args[1] = attr_number; > + args[2] = params; > + > + return ac_emit_llvm_intrinsic(ctx, > + "llvm.SI.fs.constant", > + ctx->f32, args, 3, > + AC_FUNC_ATTR_READNONE); > + } > + > + args[0] = parameter; > + args[1] = llvm_chan; > + args[2] = attr_number; > + args[3] = params; > + > + return ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.interp.mov", > + ctx->f32, args, 4, > AC_FUNC_ATTR_READNONE); > +} > + > +LLVMValueRef > +ac_build_gep0(struct ac_llvm_context *ctx, > + LLVMValueRef base_ptr, > + LLVMValueRef index) > +{ > + LLVMValueRef indices[2] = { > + LLVMConstInt(ctx->i32, 0, 0), > + index, > + }; > + return LLVMBuildGEP(ctx->builder, base_ptr, > + indices, 2, ""); > +} > + > +void > +ac_build_indexed_store(struct ac_llvm_context *ctx, > + LLVMValueRef base_ptr, LLVMValueRef index, > + LLVMValueRef value) > +{ > + LLVMBuildStore(ctx->builder, value, > + ac_build_gep0(ctx, base_ptr, index)); > +} > + > +/** > + * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad. > + * It's equivalent to doing a load from &base_ptr[index]. > + * > + * \param base_ptr Where the array starts. > + * \param index The element index into the array. > + * \param uniform Whether the base_ptr and index can be assumed to be > + * dynamically uniform > + */ > +LLVMValueRef > +ac_build_indexed_load(struct ac_llvm_context *ctx, > + LLVMValueRef base_ptr, LLVMValueRef index, > + bool uniform) > +{ > + LLVMValueRef pointer; > + > + pointer = ac_build_gep0(ctx, base_ptr, index); > + if (uniform) > + LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md); > + return LLVMBuildLoad(ctx->builder, pointer, ""); > +} > + > +/** > + * Do a load from &base_ptr[index], but also add a flag that it's loading > + * a constant from a dynamically uniform index. > + */ > +LLVMValueRef > +ac_build_indexed_load_const(struct ac_llvm_context *ctx, > + LLVMValueRef base_ptr, LLVMValueRef index) > +{ > + LLVMValueRef result = ac_build_indexed_load(ctx, base_ptr, index, > true); > + LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md); > + return result; > +} > + > +/* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by > num_channels=1..4. > + * The type of vdata must be one of i32 (num_channels=1), v2i32 > (num_channels=2), > + * or v4i32 (num_channels=3,4). > + */ > +void > +ac_build_tbuffer_store(struct ac_llvm_context *ctx, > + LLVMValueRef rsrc, > + LLVMValueRef vdata, > + unsigned num_channels, > + LLVMValueRef vaddr, > + LLVMValueRef soffset, > + unsigned inst_offset, > + unsigned dfmt, > + unsigned nfmt, > + unsigned offen, > + unsigned idxen, > + unsigned glc, > + unsigned slc, > + unsigned tfe) > +{ > + LLVMValueRef args[] = { > + rsrc, > + vdata, > + LLVMConstInt(ctx->i32, num_channels, 0), > + vaddr, > + soffset, > + LLVMConstInt(ctx->i32, inst_offset, 0), > + LLVMConstInt(ctx->i32, dfmt, 0), > + LLVMConstInt(ctx->i32, nfmt, 0), > + LLVMConstInt(ctx->i32, offen, 0), > + LLVMConstInt(ctx->i32, idxen, 0), > + LLVMConstInt(ctx->i32, glc, 0), > + LLVMConstInt(ctx->i32, slc, 0), > + LLVMConstInt(ctx->i32, tfe, 0) > + }; > + > + /* The instruction offset field has 12 bits */ > + assert(offen || inst_offset < (1 << 12)); > + > + /* The intrinsic is overloaded, we need to add a type suffix for > overloading to work. */ > + unsigned func = CLAMP(num_channels, 1, 3) - 1; > + const char *types[] = {"i32", "v2i32", "v4i32"}; > + char name[256]; > + snprintf(name, sizeof(name), "llvm.SI.tbuffer.store.%s", types[func]); > + > + ac_emit_llvm_intrinsic(ctx, name, ctx->voidt, > + args, ARRAY_SIZE(args), 0); > +} > + > +void > +ac_build_tbuffer_store_dwords(struct ac_llvm_context *ctx, > + LLVMValueRef rsrc, > + LLVMValueRef vdata, > + unsigned num_channels, > + LLVMValueRef vaddr, > + LLVMValueRef soffset, > + unsigned inst_offset) > +{ > + static unsigned dfmt[] = { > + V_008F0C_BUF_DATA_FORMAT_32, > + V_008F0C_BUF_DATA_FORMAT_32_32, > + V_008F0C_BUF_DATA_FORMAT_32_32_32, > + V_008F0C_BUF_DATA_FORMAT_32_32_32_32 > + }; > + assert(num_channels >= 1 && num_channels <= 4); > + > + ac_build_tbuffer_store(ctx, rsrc, vdata, num_channels, vaddr, soffset, > + inst_offset, dfmt[num_channels - 1], > + V_008F0C_BUF_NUM_FORMAT_UINT, 1, 0, 1, 1, 0); > +} > + > +LLVMValueRef > +ac_build_buffer_load(struct ac_llvm_context *ctx, > + LLVMValueRef rsrc, > + int num_channels, > + LLVMValueRef vindex, > + LLVMValueRef voffset, > + LLVMValueRef soffset, > + unsigned inst_offset, > + unsigned glc, > + unsigned slc) > +{ > + unsigned func = CLAMP(num_channels, 1, 3) - 1; > + > + if (HAVE_LLVM >= 0x309) { > + LLVMValueRef args[] = { > + LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""), > + vindex ? vindex : LLVMConstInt(ctx->i32, 0, 0), > + LLVMConstInt(ctx->i32, inst_offset, 0), > + LLVMConstInt(ctx->i1, glc, 0), > + LLVMConstInt(ctx->i1, slc, 0) > + }; > + > + LLVMTypeRef types[] = {ctx->f32, LLVMVectorType(ctx->f32, 2), > + ctx->v4f32}; > + const char *type_names[] = {"f32", "v2f32", "v4f32"}; > + char name[256]; > + > + if (voffset) { > + args[2] = LLVMBuildAdd(ctx->builder, args[2], voffset, > + ""); > + } > + > + if (soffset) { > + args[2] = LLVMBuildAdd(ctx->builder, args[2], soffset, > + ""); > + } > + > + snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s", > + type_names[func]); > + > + return ac_emit_llvm_intrinsic(ctx, name, types[func], args, > + ARRAY_SIZE(args), > AC_FUNC_ATTR_READONLY); > + } else { > + LLVMValueRef args[] = { > + LLVMBuildBitCast(ctx->builder, rsrc, ctx->v16i8, ""), > + voffset ? voffset : vindex, > + soffset, > + LLVMConstInt(ctx->i32, inst_offset, 0), > + LLVMConstInt(ctx->i32, voffset ? 1 : 0, 0), // offen > + LLVMConstInt(ctx->i32, vindex ? 1 : 0, 0), //idxen > + LLVMConstInt(ctx->i32, glc, 0), > + LLVMConstInt(ctx->i32, slc, 0), > + LLVMConstInt(ctx->i32, 0, 0), // TFE > + }; > + > + LLVMTypeRef types[] = {ctx->i32, LLVMVectorType(ctx->i32, 2), > + ctx->v4i32}; > + const char *type_names[] = {"i32", "v2i32", "v4i32"}; > + const char *arg_type = "i32"; > + char name[256]; > + > + if (voffset && vindex) { > + LLVMValueRef vaddr[] = {vindex, voffset}; > + > + arg_type = "v2i32"; > + args[1] = ac_build_gather_values(ctx, vaddr, 2); > + } > + > + snprintf(name, sizeof(name), > "llvm.SI.buffer.load.dword.%s.%s", > + type_names[func], arg_type); > + > + return ac_emit_llvm_intrinsic(ctx, name, types[func], args, > + ARRAY_SIZE(args), > AC_FUNC_ATTR_READONLY); > + } > +} > + > +/** > + * Set range metadata on an instruction. This can only be used on load and > + * call instructions. If you know an instruction can only produce the values > + * 0, 1, 2, you would do set_range_metadata(value, 0, 3); > + * \p lo is the minimum value inclusive. > + * \p hi is the maximum value exclusive. > + */ > +static void set_range_metadata(struct ac_llvm_context *ctx, > + LLVMValueRef value, unsigned lo, unsigned hi) > +{ > + LLVMValueRef range_md, md_args[2]; > + LLVMTypeRef type = LLVMTypeOf(value); > + LLVMContextRef context = LLVMGetTypeContext(type); > + > + md_args[0] = LLVMConstInt(type, lo, false); > + md_args[1] = LLVMConstInt(type, hi, false); > + range_md = LLVMMDNodeInContext(context, md_args, 2); > + LLVMSetMetadata(value, ctx->range_md_kind, range_md); > +} > + > +LLVMValueRef > +ac_get_thread_id(struct ac_llvm_context *ctx) > +{ > + LLVMValueRef tid; > + > + if (HAVE_LLVM < 0x0308) { > + tid = ac_emit_llvm_intrinsic(ctx, "llvm.SI.tid", > + ctx->i32, > + NULL, 0, AC_FUNC_ATTR_READNONE); > + } else { > + LLVMValueRef tid_args[2]; > + tid_args[0] = LLVMConstInt(ctx->i32, 0xffffffff, false); > + tid_args[1] = LLVMConstInt(ctx->i32, 0, false); > + tid_args[1] = ac_emit_llvm_intrinsic(ctx, > + "llvm.amdgcn.mbcnt.lo", > ctx->i32, > + tid_args, 2, > AC_FUNC_ATTR_READNONE); > + > + tid = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi", > + ctx->i32, tid_args, > + 2, AC_FUNC_ATTR_READNONE); > + } > + set_range_metadata(ctx, tid, 0, 64); > + return tid; > +} > + > +/* > + * SI implements derivatives using the local data store (LDS) > + * All writes to the LDS happen in all executing threads at > + * the same time. TID is the Thread ID for the current > + * thread and is a value between 0 and 63, representing > + * the thread's position in the wavefront. > + * > + * For the pixel shader threads are grouped into quads of four pixels. > + * The TIDs of the pixels of a quad are: > + * > + * +------+------+ > + * |4n + 0|4n + 1| > + * +------+------+ > + * |4n + 2|4n + 3| > + * +------+------+ > + * > + * So, masking the TID with 0xfffffffc yields the TID of the top left pixel > + * of the quad, masking with 0xfffffffd yields the TID of the top pixel of > + * the current pixel's column, and masking with 0xfffffffe yields the TID > + * of the left pixel of the current pixel's row. > + * > + * Adding 1 yields the TID of the pixel to the right of the left pixel, and > + * adding 2 yields the TID of the pixel below the top pixel. > + */ > +LLVMValueRef > +ac_emit_ddxy(struct ac_llvm_context *ctx, > + bool has_ds_bpermute, > + uint32_t mask, > + int idx, > + LLVMValueRef lds, > + LLVMValueRef val) > +{ > + LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, args[2]; > + LLVMValueRef result; > + > + thread_id = ac_get_thread_id(ctx); > + > + tl_tid = LLVMBuildAnd(ctx->builder, thread_id, > + LLVMConstInt(ctx->i32, mask, false), ""); > + > + trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid, > + LLVMConstInt(ctx->i32, idx, false), ""); > + > + if (has_ds_bpermute) { > + args[0] = LLVMBuildMul(ctx->builder, tl_tid, > + LLVMConstInt(ctx->i32, 4, false), ""); > + args[1] = val; > + tl = ac_emit_llvm_intrinsic(ctx, > + "llvm.amdgcn.ds.bpermute", > ctx->i32, > + args, 2, AC_FUNC_ATTR_READNONE); > + > + args[0] = LLVMBuildMul(ctx->builder, trbl_tid, > + LLVMConstInt(ctx->i32, 4, false), ""); > + trbl = ac_emit_llvm_intrinsic(ctx, > + "llvm.amdgcn.ds.bpermute", > ctx->i32, > + args, 2, AC_FUNC_ATTR_READNONE); > + } else { > + LLVMValueRef store_ptr, load_ptr0, load_ptr1; > + > + store_ptr = ac_build_gep0(ctx, lds, thread_id); > + load_ptr0 = ac_build_gep0(ctx, lds, tl_tid); > + load_ptr1 = ac_build_gep0(ctx, lds, trbl_tid); > + > + LLVMBuildStore(ctx->builder, val, store_ptr); > + tl = LLVMBuildLoad(ctx->builder, load_ptr0, ""); > + trbl = LLVMBuildLoad(ctx->builder, load_ptr1, ""); > + } > + > + tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, ""); > + trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, ""); > + result = LLVMBuildFSub(ctx->builder, trbl, tl, ""); > + return result; > +} > diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h > new file mode 100644 > index 0000000..37a5bea > --- /dev/null > +++ b/src/amd/common/ac_llvm_build.h > @@ -0,0 +1,177 @@ > +/* > + * Copyright 2016 Bas Nieuwenhuizen > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the > + * "Software"), to deal in the Software without restriction, including > + * without limitation the rights to use, copy, modify, merge, publish, > + * distribute, sub license, and/or sell copies of the Software, and to > + * permit persons to whom the Software is furnished to do so, subject to > + * the following conditions: > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL > + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY > CLAIM, > + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR > + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE > + * USE OR OTHER DEALINGS IN THE SOFTWARE. > + * > + * The above copyright notice and this permission notice (including the > + * next paragraph) shall be included in all copies or substantial portions > + * of the Software. > + * > + */ > +#ifndef AC_LLVM_BUILD_H > +#define AC_LLVM_BUILD_H > + > +#include <stdbool.h> > +#include <llvm-c/TargetMachine.h> > + > +#ifdef __cplusplus > +extern "C" { > +#endif > + > +struct ac_llvm_context { > + LLVMContextRef context; > + LLVMModuleRef module; > + LLVMBuilderRef builder; > + > + LLVMTypeRef voidt; > + LLVMTypeRef i1; > + LLVMTypeRef i8; > + LLVMTypeRef i32; > + LLVMTypeRef f32; > + LLVMTypeRef v4i32; > + LLVMTypeRef v4f32; > + LLVMTypeRef v16i8; > + > + unsigned range_md_kind; > + unsigned invariant_load_md_kind; > + unsigned uniform_md_kind; > + unsigned fpmath_md_kind; > + LLVMValueRef fpmath_md_2p5_ulp; > + LLVMValueRef empty_md; > +}; > + > +void > +ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context); > + > +LLVMValueRef > +ac_emit_llvm_intrinsic(struct ac_llvm_context *ctx, const char *name, > + LLVMTypeRef return_type, LLVMValueRef *params, > + unsigned param_count, unsigned attrib_mask); > + > +LLVMValueRef > +ac_build_gather_values_extended(struct ac_llvm_context *ctx, > + LLVMValueRef *values, > + unsigned value_count, > + unsigned value_stride, > + bool load); > +LLVMValueRef > +ac_build_gather_values(struct ac_llvm_context *ctx, > + LLVMValueRef *values, > + unsigned value_count); > + > +LLVMValueRef > +ac_emit_fdiv(struct ac_llvm_context *ctx, > + LLVMValueRef num, > + LLVMValueRef den); > + > +void > +ac_prepare_cube_coords(struct ac_llvm_context *ctx, > + bool is_deriv, bool is_array, > + LLVMValueRef *coords_arg, > + LLVMValueRef *derivs_arg); > + > + > +LLVMValueRef > +ac_build_fs_interp(struct ac_llvm_context *ctx, > + LLVMValueRef llvm_chan, > + LLVMValueRef attr_number, > + LLVMValueRef params, > + LLVMValueRef i, > + LLVMValueRef j); > + > +LLVMValueRef > +ac_build_fs_interp_mov(struct ac_llvm_context *ctx, > + LLVMValueRef parameter, > + LLVMValueRef llvm_chan, > + LLVMValueRef attr_number, > + LLVMValueRef params); > + > +LLVMValueRef > +ac_build_gep0(struct ac_llvm_context *ctx, > + LLVMValueRef base_ptr, > + LLVMValueRef index); > + > +void > +ac_build_indexed_store(struct ac_llvm_context *ctx, > + LLVMValueRef base_ptr, LLVMValueRef index, > + LLVMValueRef value); > + > +LLVMValueRef > +ac_build_indexed_load(struct ac_llvm_context *ctx, > + LLVMValueRef base_ptr, LLVMValueRef index, > + bool uniform); > + > +LLVMValueRef > +ac_build_indexed_load_const(struct ac_llvm_context *ctx, > + LLVMValueRef base_ptr, LLVMValueRef index); > + > +void > +ac_build_tbuffer_store_dwords(struct ac_llvm_context *ctx, > + LLVMValueRef rsrc, > + LLVMValueRef vdata, > + unsigned num_channels, > + LLVMValueRef vaddr, > + LLVMValueRef soffset, > + unsigned inst_offset); > + > +void > +ac_build_tbuffer_store(struct ac_llvm_context *ctx, > + LLVMValueRef rsrc, > + LLVMValueRef vdata, > + unsigned num_channels, > + LLVMValueRef vaddr, > + LLVMValueRef soffset, > + unsigned inst_offset, > + unsigned dfmt, > + unsigned nfmt, > + unsigned offen, > + unsigned idxen, > + unsigned glc, > + unsigned slc, > + unsigned tfe); > + > +LLVMValueRef > +ac_build_buffer_load(struct ac_llvm_context *ctx, > + LLVMValueRef rsrc, > + int num_channels, > + LLVMValueRef vindex, > + LLVMValueRef voffset, > + LLVMValueRef soffset, > + unsigned inst_offset, > + unsigned glc, > + unsigned slc); > + > +LLVMValueRef > +ac_get_thread_id(struct ac_llvm_context *ctx); > + > +#define AC_TID_MASK_TOP_LEFT 0xfffffffc > +#define AC_TID_MASK_TOP 0xfffffffd > +#define AC_TID_MASK_LEFT 0xfffffffe > + > +LLVMValueRef > +ac_emit_ddxy(struct ac_llvm_context *ctx, > + bool has_ds_bpermute, > + uint32_t mask, > + int idx, > + LLVMValueRef lds, > + LLVMValueRef val); > + > +#ifdef __cplusplus > +} > +#endif > + > +#endif > diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c > index 156d874..be127c5 100644 > --- a/src/amd/common/ac_llvm_util.c > +++ b/src/amd/common/ac_llvm_util.c > @@ -31,11 +31,7 @@ > > #include <assert.h> > #include <stdio.h> > - > -#include "util/bitscan.h" > -#include "util/macros.h" > - > -#include "sid.h" > +#include <string.h> > > static void ac_init_llvm_target() > { > @@ -146,44 +142,6 @@ LLVMTargetMachineRef ac_create_target_machine(enum > radeon_family family, bool su > return tm; > } > > -/* Initialize module-independent parts of the context. > - * > - * The caller is responsible for initializing ctx::module and ctx::builder. > - */ > -void > -ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context) > -{ > - LLVMValueRef args[1]; > - > - ctx->context = context; > - ctx->module = NULL; > - ctx->builder = NULL; > - > - ctx->voidt = LLVMVoidTypeInContext(ctx->context); > - ctx->i1 = LLVMInt1TypeInContext(ctx->context); > - ctx->i8 = LLVMInt8TypeInContext(ctx->context); > - ctx->i32 = LLVMIntTypeInContext(ctx->context, 32); > - ctx->f32 = LLVMFloatTypeInContext(ctx->context); > - ctx->v4i32 = LLVMVectorType(ctx->i32, 4); > - ctx->v4f32 = LLVMVectorType(ctx->f32, 4); > - ctx->v16i8 = LLVMVectorType(ctx->i8, 16); > - > - ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context, > - "range", 5); > - > - ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context, > - > "invariant.load", 14); > - > - ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->context, > "fpmath", 6); > - > - args[0] = LLVMConstReal(ctx->f32, 2.5); > - ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1); > - > - ctx->uniform_md_kind = LLVMGetMDKindIDInContext(ctx->context, > - "amdgpu.uniform", 14); > - > - ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0); > -} > > #if HAVE_LLVM < 0x0400 > static LLVMAttribute ac_attr_to_llvm_attr(enum ac_func_attr attr) > @@ -245,285 +203,6 @@ ac_add_function_attr(LLVMValueRef function, > #endif > } > > -LLVMValueRef > -ac_emit_llvm_intrinsic(struct ac_llvm_context *ctx, const char *name, > - LLVMTypeRef return_type, LLVMValueRef *params, > - unsigned param_count, unsigned attrib_mask) > -{ > - LLVMValueRef function; > - > - function = LLVMGetNamedFunction(ctx->module, name); > - if (!function) { > - LLVMTypeRef param_types[32], function_type; > - unsigned i; > - > - assert(param_count <= 32); > - > - for (i = 0; i < param_count; ++i) { > - assert(params[i]); > - param_types[i] = LLVMTypeOf(params[i]); > - } > - function_type = > - LLVMFunctionType(return_type, param_types, param_count, > 0); > - function = LLVMAddFunction(ctx->module, name, function_type); > - > - LLVMSetFunctionCallConv(function, LLVMCCallConv); > - LLVMSetLinkage(function, LLVMExternalLinkage); > - > - attrib_mask |= AC_FUNC_ATTR_NOUNWIND; > - while (attrib_mask) { > - enum ac_func_attr attr = 1u << > u_bit_scan(&attrib_mask); > - ac_add_function_attr(function, -1, attr); > - } > - } > - return LLVMBuildCall(ctx->builder, function, params, param_count, ""); > -} > - > -LLVMValueRef > -ac_build_gather_values_extended(struct ac_llvm_context *ctx, > - LLVMValueRef *values, > - unsigned value_count, > - unsigned value_stride, > - bool load) > -{ > - LLVMBuilderRef builder = ctx->builder; > - LLVMValueRef vec; > - unsigned i; > - > - > - if (value_count == 1) { > - if (load) > - return LLVMBuildLoad(builder, values[0], ""); > - return values[0]; > - } else if (!value_count) > - unreachable("value_count is 0"); > - > - for (i = 0; i < value_count; i++) { > - LLVMValueRef value = values[i * value_stride]; > - if (load) > - value = LLVMBuildLoad(builder, value, ""); > - > - if (!i) > - vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), > value_count)); > - LLVMValueRef index = LLVMConstInt(ctx->i32, i, false); > - vec = LLVMBuildInsertElement(builder, vec, value, index, ""); > - } > - return vec; > -} > - > -LLVMValueRef > -ac_build_gather_values(struct ac_llvm_context *ctx, > - LLVMValueRef *values, > - unsigned value_count) > -{ > - return ac_build_gather_values_extended(ctx, values, value_count, 1, > false); > -} > - > -LLVMValueRef > -ac_emit_fdiv(struct ac_llvm_context *ctx, > - LLVMValueRef num, > - LLVMValueRef den) > -{ > - LLVMValueRef ret = LLVMBuildFDiv(ctx->builder, num, den, ""); > - > - if (!LLVMIsConstant(ret)) > - LLVMSetMetadata(ret, ctx->fpmath_md_kind, > ctx->fpmath_md_2p5_ulp); > - return ret; > -} > - > -/* Coordinates for cube map selection. sc, tc, and ma are as in Table 8.27 > - * of the OpenGL 4.5 (Compatibility Profile) specification, except ma is > - * already multiplied by two. id is the cube face number. > - */ > -struct cube_selection_coords { > - LLVMValueRef stc[2]; > - LLVMValueRef ma; > - LLVMValueRef id; > -}; > - > -static void > -build_cube_intrinsic(struct ac_llvm_context *ctx, > - LLVMValueRef in[3], > - struct cube_selection_coords *out) > -{ > - LLVMBuilderRef builder = ctx->builder; > - > - if (HAVE_LLVM >= 0x0309) { > - LLVMTypeRef f32 = ctx->f32; > - > - out->stc[1] = ac_emit_llvm_intrinsic(ctx, > "llvm.amdgcn.cubetc", > - f32, in, 3, AC_FUNC_ATTR_READNONE); > - out->stc[0] = ac_emit_llvm_intrinsic(ctx, > "llvm.amdgcn.cubesc", > - f32, in, 3, AC_FUNC_ATTR_READNONE); > - out->ma = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubema", > - f32, in, 3, AC_FUNC_ATTR_READNONE); > - out->id = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubeid", > - f32, in, 3, AC_FUNC_ATTR_READNONE); > - } else { > - LLVMValueRef c[4] = { > - in[0], > - in[1], > - in[2], > - LLVMGetUndef(LLVMTypeOf(in[0])) > - }; > - LLVMValueRef vec = ac_build_gather_values(ctx, c, 4); > - > - LLVMValueRef tmp = > - ac_emit_llvm_intrinsic(ctx, "llvm.AMDGPU.cube", > - LLVMTypeOf(vec), &vec, 1, > - AC_FUNC_ATTR_READNONE); > - > - out->stc[1] = LLVMBuildExtractElement(builder, tmp, > - LLVMConstInt(ctx->i32, 0, 0), ""); > - out->stc[0] = LLVMBuildExtractElement(builder, tmp, > - LLVMConstInt(ctx->i32, 1, 0), ""); > - out->ma = LLVMBuildExtractElement(builder, tmp, > - LLVMConstInt(ctx->i32, 2, 0), ""); > - out->id = LLVMBuildExtractElement(builder, tmp, > - LLVMConstInt(ctx->i32, 3, 0), ""); > - } > -} > - > -/** > - * Build a manual selection sequence for cube face sc/tc coordinates and > - * major axis vector (multiplied by 2 for consistency) for the given > - * vec3 \p coords, for the face implied by \p selcoords. > - * > - * For the major axis, we always adjust the sign to be in the direction of > - * selcoords.ma; i.e., a positive out_ma means that coords is pointed towards > - * the selcoords major axis. > - */ > -static void build_cube_select(LLVMBuilderRef builder, > - const struct cube_selection_coords *selcoords, > - const LLVMValueRef *coords, > - LLVMValueRef *out_st, > - LLVMValueRef *out_ma) > -{ > - LLVMTypeRef f32 = LLVMTypeOf(coords[0]); > - LLVMValueRef is_ma_positive; > - LLVMValueRef sgn_ma; > - LLVMValueRef is_ma_z, is_not_ma_z; > - LLVMValueRef is_ma_y; > - LLVMValueRef is_ma_x; > - LLVMValueRef sgn; > - LLVMValueRef tmp; > - > - is_ma_positive = LLVMBuildFCmp(builder, LLVMRealUGE, > - selcoords->ma, LLVMConstReal(f32, 0.0), ""); > - sgn_ma = LLVMBuildSelect(builder, is_ma_positive, > - LLVMConstReal(f32, 1.0), LLVMConstReal(f32, -1.0), ""); > - > - is_ma_z = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, > LLVMConstReal(f32, 4.0), ""); > - is_not_ma_z = LLVMBuildNot(builder, is_ma_z, ""); > - is_ma_y = LLVMBuildAnd(builder, is_not_ma_z, > - LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, > LLVMConstReal(f32, 2.0), ""), ""); > - is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, > is_ma_y, ""), ""); > - > - /* Select sc */ > - tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], coords[0], ""); > - sgn = LLVMBuildSelect(builder, is_ma_y, LLVMConstReal(f32, 1.0), > - LLVMBuildSelect(builder, is_ma_x, sgn_ma, > - LLVMBuildFNeg(builder, sgn_ma, ""), ""), ""); > - out_st[0] = LLVMBuildFMul(builder, tmp, sgn, ""); > - > - /* Select tc */ > - tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], ""); > - sgn = LLVMBuildSelect(builder, is_ma_y, LLVMBuildFNeg(builder, > sgn_ma, ""), > - LLVMConstReal(f32, -1.0), ""); > - out_st[1] = LLVMBuildFMul(builder, tmp, sgn, ""); > - > - /* Select ma */ > - tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], > - LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), > ""); > - sgn = LLVMBuildSelect(builder, is_ma_positive, > - LLVMConstReal(f32, 2.0), LLVMConstReal(f32, -2.0), ""); > - *out_ma = LLVMBuildFMul(builder, tmp, sgn, ""); > -} > - > -void > -ac_prepare_cube_coords(struct ac_llvm_context *ctx, > - bool is_deriv, bool is_array, > - LLVMValueRef *coords_arg, > - LLVMValueRef *derivs_arg) > -{ > - > - LLVMBuilderRef builder = ctx->builder; > - struct cube_selection_coords selcoords; > - LLVMValueRef coords[3]; > - LLVMValueRef invma; > - > - build_cube_intrinsic(ctx, coords_arg, &selcoords); > - > - invma = ac_emit_llvm_intrinsic(ctx, "llvm.fabs.f32", > - ctx->f32, &selcoords.ma, 1, AC_FUNC_ATTR_READNONE); > - invma = ac_emit_fdiv(ctx, LLVMConstReal(ctx->f32, 1.0), invma); > - > - for (int i = 0; i < 2; ++i) > - coords[i] = LLVMBuildFMul(builder, selcoords.stc[i], invma, > ""); > - > - coords[2] = selcoords.id; > - > - if (is_deriv && derivs_arg) { > - LLVMValueRef derivs[4]; > - int axis; > - > - /* Convert cube derivatives to 2D derivatives. */ > - for (axis = 0; axis < 2; axis++) { > - LLVMValueRef deriv_st[2]; > - LLVMValueRef deriv_ma; > - > - /* Transform the derivative alongside the texture > - * coordinate. Mathematically, the correct formula is > - * as follows. Assume we're projecting onto the +Z > face > - * and denote by dx/dh the derivative of the > (original) > - * X texture coordinate with respect to horizontal > - * window coordinates. The projection onto the +Z face > - * plane is: > - * > - * f(x,z) = x/z > - * > - * Then df/dh = df/dx * dx/dh + df/dz * dz/dh > - * = 1/z * dx/dh - x/z * 1/z * dz/dh. > - * > - * This motivatives the implementation below. > - * > - * Whether this actually gives the expected results > for > - * apps that might feed in derivatives obtained via > - * finite differences is anyone's guess. The OpenGL > spec > - * seems awfully quiet about how textureGrad for cube > - * maps should be handled. > - */ > - build_cube_select(builder, &selcoords, > &derivs_arg[axis * 3], > - deriv_st, &deriv_ma); > - > - deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, > ""); > - > - for (int i = 0; i < 2; ++i) > - derivs[axis * 2 + i] = > - LLVMBuildFSub(builder, > - LLVMBuildFMul(builder, > deriv_st[i], invma, ""), > - LLVMBuildFMul(builder, > deriv_ma, coords[i], ""), ""); > - } > - > - memcpy(derivs_arg, derivs, sizeof(derivs)); > - } > - > - /* Shift the texture coordinate. This must be applied after the > - * derivative calculation. > - */ > - for (int i = 0; i < 2; ++i) > - coords[i] = LLVMBuildFAdd(builder, coords[i], > LLVMConstReal(ctx->f32, 1.5), ""); > - > - if (is_array) { > - /* for cube arrays coord.z = coord.w(array_index) * 8 + face > */ > - /* coords_arg.w component - array_index for cube arrays */ > - LLVMValueRef tmp = LLVMBuildFMul(ctx->builder, coords_arg[3], > LLVMConstReal(ctx->f32, 8.0), ""); > - coords[2] = LLVMBuildFAdd(ctx->builder, tmp, coords[2], ""); > - } > - > - memcpy(coords_arg, coords, sizeof(coords)); > -} > - > void > ac_dump_module(LLVMModuleRef module) > { > @@ -531,397 +210,3 @@ ac_dump_module(LLVMModuleRef module) > fprintf(stderr, "%s", str); > LLVMDisposeMessage(str); > } > - > -LLVMValueRef > -ac_build_fs_interp(struct ac_llvm_context *ctx, > - LLVMValueRef llvm_chan, > - LLVMValueRef attr_number, > - LLVMValueRef params, > - LLVMValueRef i, > - LLVMValueRef j) > -{ > - LLVMValueRef args[5]; > - LLVMValueRef p1; > - > - if (HAVE_LLVM < 0x0400) { > - LLVMValueRef ij[2]; > - ij[0] = LLVMBuildBitCast(ctx->builder, i, ctx->i32, ""); > - ij[1] = LLVMBuildBitCast(ctx->builder, j, ctx->i32, ""); > - > - args[0] = llvm_chan; > - args[1] = attr_number; > - args[2] = params; > - args[3] = ac_build_gather_values(ctx, ij, 2); > - return ac_emit_llvm_intrinsic(ctx, "llvm.SI.fs.interp", > - ctx->f32, args, 4, > - AC_FUNC_ATTR_READNONE); > - } > - > - args[0] = i; > - args[1] = llvm_chan; > - args[2] = attr_number; > - args[3] = params; > - > - p1 = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.interp.p1", > - ctx->f32, args, 4, AC_FUNC_ATTR_READNONE); > - > - args[0] = p1; > - args[1] = j; > - args[2] = llvm_chan; > - args[3] = attr_number; > - args[4] = params; > - > - return ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.interp.p2", > - ctx->f32, args, 5, > AC_FUNC_ATTR_READNONE); > -} > - > -LLVMValueRef > -ac_build_fs_interp_mov(struct ac_llvm_context *ctx, > - LLVMValueRef parameter, > - LLVMValueRef llvm_chan, > - LLVMValueRef attr_number, > - LLVMValueRef params) > -{ > - LLVMValueRef args[4]; > - if (HAVE_LLVM < 0x0400) { > - args[0] = llvm_chan; > - args[1] = attr_number; > - args[2] = params; > - > - return ac_emit_llvm_intrinsic(ctx, > - "llvm.SI.fs.constant", > - ctx->f32, args, 3, > - AC_FUNC_ATTR_READNONE); > - } > - > - args[0] = parameter; > - args[1] = llvm_chan; > - args[2] = attr_number; > - args[3] = params; > - > - return ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.interp.mov", > - ctx->f32, args, 4, > AC_FUNC_ATTR_READNONE); > -} > - > -LLVMValueRef > -ac_build_gep0(struct ac_llvm_context *ctx, > - LLVMValueRef base_ptr, > - LLVMValueRef index) > -{ > - LLVMValueRef indices[2] = { > - LLVMConstInt(ctx->i32, 0, 0), > - index, > - }; > - return LLVMBuildGEP(ctx->builder, base_ptr, > - indices, 2, ""); > -} > - > -void > -ac_build_indexed_store(struct ac_llvm_context *ctx, > - LLVMValueRef base_ptr, LLVMValueRef index, > - LLVMValueRef value) > -{ > - LLVMBuildStore(ctx->builder, value, > - ac_build_gep0(ctx, base_ptr, index)); > -} > - > -/** > - * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad. > - * It's equivalent to doing a load from &base_ptr[index]. > - * > - * \param base_ptr Where the array starts. > - * \param index The element index into the array. > - * \param uniform Whether the base_ptr and index can be assumed to be > - * dynamically uniform > - */ > -LLVMValueRef > -ac_build_indexed_load(struct ac_llvm_context *ctx, > - LLVMValueRef base_ptr, LLVMValueRef index, > - bool uniform) > -{ > - LLVMValueRef pointer; > - > - pointer = ac_build_gep0(ctx, base_ptr, index); > - if (uniform) > - LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md); > - return LLVMBuildLoad(ctx->builder, pointer, ""); > -} > - > -/** > - * Do a load from &base_ptr[index], but also add a flag that it's loading > - * a constant from a dynamically uniform index. > - */ > -LLVMValueRef > -ac_build_indexed_load_const(struct ac_llvm_context *ctx, > - LLVMValueRef base_ptr, LLVMValueRef index) > -{ > - LLVMValueRef result = ac_build_indexed_load(ctx, base_ptr, index, > true); > - LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md); > - return result; > -} > - > -/* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by > num_channels=1..4. > - * The type of vdata must be one of i32 (num_channels=1), v2i32 > (num_channels=2), > - * or v4i32 (num_channels=3,4). > - */ > -void > -ac_build_tbuffer_store(struct ac_llvm_context *ctx, > - LLVMValueRef rsrc, > - LLVMValueRef vdata, > - unsigned num_channels, > - LLVMValueRef vaddr, > - LLVMValueRef soffset, > - unsigned inst_offset, > - unsigned dfmt, > - unsigned nfmt, > - unsigned offen, > - unsigned idxen, > - unsigned glc, > - unsigned slc, > - unsigned tfe) > -{ > - LLVMValueRef args[] = { > - rsrc, > - vdata, > - LLVMConstInt(ctx->i32, num_channels, 0), > - vaddr, > - soffset, > - LLVMConstInt(ctx->i32, inst_offset, 0), > - LLVMConstInt(ctx->i32, dfmt, 0), > - LLVMConstInt(ctx->i32, nfmt, 0), > - LLVMConstInt(ctx->i32, offen, 0), > - LLVMConstInt(ctx->i32, idxen, 0), > - LLVMConstInt(ctx->i32, glc, 0), > - LLVMConstInt(ctx->i32, slc, 0), > - LLVMConstInt(ctx->i32, tfe, 0) > - }; > - > - /* The instruction offset field has 12 bits */ > - assert(offen || inst_offset < (1 << 12)); > - > - /* The intrinsic is overloaded, we need to add a type suffix for > overloading to work. */ > - unsigned func = CLAMP(num_channels, 1, 3) - 1; > - const char *types[] = {"i32", "v2i32", "v4i32"}; > - char name[256]; > - snprintf(name, sizeof(name), "llvm.SI.tbuffer.store.%s", types[func]); > - > - ac_emit_llvm_intrinsic(ctx, name, ctx->voidt, > - args, ARRAY_SIZE(args), 0); > -} > - > -void > -ac_build_tbuffer_store_dwords(struct ac_llvm_context *ctx, > - LLVMValueRef rsrc, > - LLVMValueRef vdata, > - unsigned num_channels, > - LLVMValueRef vaddr, > - LLVMValueRef soffset, > - unsigned inst_offset) > -{ > - static unsigned dfmt[] = { > - V_008F0C_BUF_DATA_FORMAT_32, > - V_008F0C_BUF_DATA_FORMAT_32_32, > - V_008F0C_BUF_DATA_FORMAT_32_32_32, > - V_008F0C_BUF_DATA_FORMAT_32_32_32_32 > - }; > - assert(num_channels >= 1 && num_channels <= 4); > - > - ac_build_tbuffer_store(ctx, rsrc, vdata, num_channels, vaddr, soffset, > - inst_offset, dfmt[num_channels - 1], > - V_008F0C_BUF_NUM_FORMAT_UINT, 1, 0, 1, 1, 0); > -} > - > -LLVMValueRef > -ac_build_buffer_load(struct ac_llvm_context *ctx, > - LLVMValueRef rsrc, > - int num_channels, > - LLVMValueRef vindex, > - LLVMValueRef voffset, > - LLVMValueRef soffset, > - unsigned inst_offset, > - unsigned glc, > - unsigned slc) > -{ > - unsigned func = CLAMP(num_channels, 1, 3) - 1; > - > - if (HAVE_LLVM >= 0x309) { > - LLVMValueRef args[] = { > - LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""), > - vindex ? vindex : LLVMConstInt(ctx->i32, 0, 0), > - LLVMConstInt(ctx->i32, inst_offset, 0), > - LLVMConstInt(ctx->i1, glc, 0), > - LLVMConstInt(ctx->i1, slc, 0) > - }; > - > - LLVMTypeRef types[] = {ctx->f32, LLVMVectorType(ctx->f32, 2), > - ctx->v4f32}; > - const char *type_names[] = {"f32", "v2f32", "v4f32"}; > - char name[256]; > - > - if (voffset) { > - args[2] = LLVMBuildAdd(ctx->builder, args[2], voffset, > - ""); > - } > - > - if (soffset) { > - args[2] = LLVMBuildAdd(ctx->builder, args[2], soffset, > - ""); > - } > - > - snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s", > - type_names[func]); > - > - return ac_emit_llvm_intrinsic(ctx, name, types[func], args, > - ARRAY_SIZE(args), > AC_FUNC_ATTR_READONLY); > - } else { > - LLVMValueRef args[] = { > - LLVMBuildBitCast(ctx->builder, rsrc, ctx->v16i8, ""), > - voffset ? voffset : vindex, > - soffset, > - LLVMConstInt(ctx->i32, inst_offset, 0), > - LLVMConstInt(ctx->i32, voffset ? 1 : 0, 0), // offen > - LLVMConstInt(ctx->i32, vindex ? 1 : 0, 0), //idxen > - LLVMConstInt(ctx->i32, glc, 0), > - LLVMConstInt(ctx->i32, slc, 0), > - LLVMConstInt(ctx->i32, 0, 0), // TFE > - }; > - > - LLVMTypeRef types[] = {ctx->i32, LLVMVectorType(ctx->i32, 2), > - ctx->v4i32}; > - const char *type_names[] = {"i32", "v2i32", "v4i32"}; > - const char *arg_type = "i32"; > - char name[256]; > - > - if (voffset && vindex) { > - LLVMValueRef vaddr[] = {vindex, voffset}; > - > - arg_type = "v2i32"; > - args[1] = ac_build_gather_values(ctx, vaddr, 2); > - } > - > - snprintf(name, sizeof(name), > "llvm.SI.buffer.load.dword.%s.%s", > - type_names[func], arg_type); > - > - return ac_emit_llvm_intrinsic(ctx, name, types[func], args, > - ARRAY_SIZE(args), > AC_FUNC_ATTR_READONLY); > - } > -} > - > -/** > - * Set range metadata on an instruction. This can only be used on load and > - * call instructions. If you know an instruction can only produce the values > - * 0, 1, 2, you would do set_range_metadata(value, 0, 3); > - * \p lo is the minimum value inclusive. > - * \p hi is the maximum value exclusive. > - */ > -static void set_range_metadata(struct ac_llvm_context *ctx, > - LLVMValueRef value, unsigned lo, unsigned hi) > -{ > - LLVMValueRef range_md, md_args[2]; > - LLVMTypeRef type = LLVMTypeOf(value); > - LLVMContextRef context = LLVMGetTypeContext(type); > - > - md_args[0] = LLVMConstInt(type, lo, false); > - md_args[1] = LLVMConstInt(type, hi, false); > - range_md = LLVMMDNodeInContext(context, md_args, 2); > - LLVMSetMetadata(value, ctx->range_md_kind, range_md); > -} > - > -LLVMValueRef > -ac_get_thread_id(struct ac_llvm_context *ctx) > -{ > - LLVMValueRef tid; > - > - if (HAVE_LLVM < 0x0308) { > - tid = ac_emit_llvm_intrinsic(ctx, "llvm.SI.tid", > - ctx->i32, > - NULL, 0, AC_FUNC_ATTR_READNONE); > - } else { > - LLVMValueRef tid_args[2]; > - tid_args[0] = LLVMConstInt(ctx->i32, 0xffffffff, false); > - tid_args[1] = LLVMConstInt(ctx->i32, 0, false); > - tid_args[1] = ac_emit_llvm_intrinsic(ctx, > - "llvm.amdgcn.mbcnt.lo", > ctx->i32, > - tid_args, 2, > AC_FUNC_ATTR_READNONE); > - > - tid = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi", > - ctx->i32, tid_args, > - 2, AC_FUNC_ATTR_READNONE); > - } > - set_range_metadata(ctx, tid, 0, 64); > - return tid; > -} > - > -/* > - * SI implements derivatives using the local data store (LDS) > - * All writes to the LDS happen in all executing threads at > - * the same time. TID is the Thread ID for the current > - * thread and is a value between 0 and 63, representing > - * the thread's position in the wavefront. > - * > - * For the pixel shader threads are grouped into quads of four pixels. > - * The TIDs of the pixels of a quad are: > - * > - * +------+------+ > - * |4n + 0|4n + 1| > - * +------+------+ > - * |4n + 2|4n + 3| > - * +------+------+ > - * > - * So, masking the TID with 0xfffffffc yields the TID of the top left pixel > - * of the quad, masking with 0xfffffffd yields the TID of the top pixel of > - * the current pixel's column, and masking with 0xfffffffe yields the TID > - * of the left pixel of the current pixel's row. > - * > - * Adding 1 yields the TID of the pixel to the right of the left pixel, and > - * adding 2 yields the TID of the pixel below the top pixel. > - */ > -LLVMValueRef > -ac_emit_ddxy(struct ac_llvm_context *ctx, > - bool has_ds_bpermute, > - uint32_t mask, > - int idx, > - LLVMValueRef lds, > - LLVMValueRef val) > -{ > - LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, args[2]; > - LLVMValueRef result; > - > - thread_id = ac_get_thread_id(ctx); > - > - tl_tid = LLVMBuildAnd(ctx->builder, thread_id, > - LLVMConstInt(ctx->i32, mask, false), ""); > - > - trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid, > - LLVMConstInt(ctx->i32, idx, false), ""); > - > - if (has_ds_bpermute) { > - args[0] = LLVMBuildMul(ctx->builder, tl_tid, > - LLVMConstInt(ctx->i32, 4, false), ""); > - args[1] = val; > - tl = ac_emit_llvm_intrinsic(ctx, > - "llvm.amdgcn.ds.bpermute", > ctx->i32, > - args, 2, AC_FUNC_ATTR_READNONE); > - > - args[0] = LLVMBuildMul(ctx->builder, trbl_tid, > - LLVMConstInt(ctx->i32, 4, false), ""); > - trbl = ac_emit_llvm_intrinsic(ctx, > - "llvm.amdgcn.ds.bpermute", > ctx->i32, > - args, 2, AC_FUNC_ATTR_READNONE); > - } else { > - LLVMValueRef store_ptr, load_ptr0, load_ptr1; > - > - store_ptr = ac_build_gep0(ctx, lds, thread_id); > - load_ptr0 = ac_build_gep0(ctx, lds, tl_tid); > - load_ptr1 = ac_build_gep0(ctx, lds, trbl_tid); > - > - LLVMBuildStore(ctx->builder, val, store_ptr); > - tl = LLVMBuildLoad(ctx->builder, load_ptr0, ""); > - trbl = LLVMBuildLoad(ctx->builder, load_ptr1, ""); > - } > - > - tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, ""); > - trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, ""); > - result = LLVMBuildFSub(ctx->builder, trbl, tl, ""); > - return result; > -} > diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h > index a457714..1f37a12 100644 > --- a/src/amd/common/ac_llvm_util.h > +++ b/src/amd/common/ac_llvm_util.h > @@ -44,154 +44,19 @@ enum ac_func_attr { > AC_FUNC_ATTR_LAST = (1 << 7) > }; > > -struct ac_llvm_context { > - LLVMContextRef context; > - LLVMModuleRef module; > - LLVMBuilderRef builder; > - > - LLVMTypeRef voidt; > - LLVMTypeRef i1; > - LLVMTypeRef i8; > - LLVMTypeRef i32; > - LLVMTypeRef f32; > - LLVMTypeRef v4i32; > - LLVMTypeRef v4f32; > - LLVMTypeRef v16i8; > - > - unsigned range_md_kind; > - unsigned invariant_load_md_kind; > - unsigned uniform_md_kind; > - unsigned fpmath_md_kind; > - LLVMValueRef fpmath_md_2p5_ulp; > - LLVMValueRef empty_md; > -}; > - > LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, > bool supports_spill); > > void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes); > bool ac_is_sgpr_param(LLVMValueRef param); > > void > -ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context); > - > -void > ac_add_function_attr(LLVMValueRef function, > int attr_idx, > enum ac_func_attr attr); > -LLVMValueRef > -ac_emit_llvm_intrinsic(struct ac_llvm_context *ctx, const char *name, > - LLVMTypeRef return_type, LLVMValueRef *params, > - unsigned param_count, unsigned attrib_mask); > - > -LLVMValueRef > -ac_build_gather_values_extended(struct ac_llvm_context *ctx, > - LLVMValueRef *values, > - unsigned value_count, > - unsigned value_stride, > - bool load); > -LLVMValueRef > -ac_build_gather_values(struct ac_llvm_context *ctx, > - LLVMValueRef *values, > - unsigned value_count); > - > -LLVMValueRef > -ac_emit_fdiv(struct ac_llvm_context *ctx, > - LLVMValueRef num, > - LLVMValueRef den); > - > -void > -ac_prepare_cube_coords(struct ac_llvm_context *ctx, > - bool is_deriv, bool is_array, > - LLVMValueRef *coords_arg, > - LLVMValueRef *derivs_arg); > > void > ac_dump_module(LLVMModuleRef module); > > -LLVMValueRef > -ac_build_fs_interp(struct ac_llvm_context *ctx, > - LLVMValueRef llvm_chan, > - LLVMValueRef attr_number, > - LLVMValueRef params, > - LLVMValueRef i, > - LLVMValueRef j); > - > -LLVMValueRef > -ac_build_fs_interp_mov(struct ac_llvm_context *ctx, > - LLVMValueRef parameter, > - LLVMValueRef llvm_chan, > - LLVMValueRef attr_number, > - LLVMValueRef params); > - > -LLVMValueRef > -ac_build_gep0(struct ac_llvm_context *ctx, > - LLVMValueRef base_ptr, > - LLVMValueRef index); > - > -void > -ac_build_indexed_store(struct ac_llvm_context *ctx, > - LLVMValueRef base_ptr, LLVMValueRef index, > - LLVMValueRef value); > - > -LLVMValueRef > -ac_build_indexed_load(struct ac_llvm_context *ctx, > - LLVMValueRef base_ptr, LLVMValueRef index, > - bool uniform); > - > -LLVMValueRef > -ac_build_indexed_load_const(struct ac_llvm_context *ctx, > - LLVMValueRef base_ptr, LLVMValueRef index); > - > -void > -ac_build_tbuffer_store_dwords(struct ac_llvm_context *ctx, > - LLVMValueRef rsrc, > - LLVMValueRef vdata, > - unsigned num_channels, > - LLVMValueRef vaddr, > - LLVMValueRef soffset, > - unsigned inst_offset); > - > -void > -ac_build_tbuffer_store(struct ac_llvm_context *ctx, > - LLVMValueRef rsrc, > - LLVMValueRef vdata, > - unsigned num_channels, > - LLVMValueRef vaddr, > - LLVMValueRef soffset, > - unsigned inst_offset, > - unsigned dfmt, > - unsigned nfmt, > - unsigned offen, > - unsigned idxen, > - unsigned glc, > - unsigned slc, > - unsigned tfe); > - > -LLVMValueRef > -ac_build_buffer_load(struct ac_llvm_context *ctx, > - LLVMValueRef rsrc, > - int num_channels, > - LLVMValueRef vindex, > - LLVMValueRef voffset, > - LLVMValueRef soffset, > - unsigned inst_offset, > - unsigned glc, > - unsigned slc); > - > -LLVMValueRef > -ac_get_thread_id(struct ac_llvm_context *ctx); > - > -#define AC_TID_MASK_TOP_LEFT 0xfffffffc > -#define AC_TID_MASK_TOP 0xfffffffd > -#define AC_TID_MASK_LEFT 0xfffffffe > - > -LLVMValueRef > -ac_emit_ddxy(struct ac_llvm_context *ctx, > - bool has_ds_bpermute, > - uint32_t mask, > - int idx, > - LLVMValueRef lds, > - LLVMValueRef val); > #ifdef __cplusplus > } > #endif > diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c > index e8dc752..cd1946b 100644 > --- a/src/amd/common/ac_nir_to_llvm.c > +++ b/src/amd/common/ac_nir_to_llvm.c > @@ -22,6 +22,7 @@ > */ > > #include "ac_nir_to_llvm.h" > +#include "ac_llvm_build.h" > #include "ac_llvm_util.h" > #include "ac_binary.h" > #include "sid.h" > diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h > b/src/gallium/drivers/radeonsi/si_shader_internal.h > index 05b9868..26cc28d 100644 > --- a/src/gallium/drivers/radeonsi/si_shader_internal.h > +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h > @@ -29,6 +29,7 @@ > #include "gallivm/lp_bld_tgsi.h" > #include "tgsi/tgsi_parse.h" > #include "ac_llvm_util.h" > +#include "ac_llvm_build.h" > > #include <llvm-c/Core.h> > #include <llvm-c/TargetMachine.h> > -- > 2.9.3 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev