Module: Mesa Branch: main Commit: 9b2ec290c4724f8aaf55a94761f527c3c710137a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=9b2ec290c4724f8aaf55a94761f527c3c710137a
Author: Qiang Yu <[email protected]> Date: Sun Jul 3 17:32:33 2022 +0800 ac/llvm: remove unused llvm cull Acked-by: Pierre-Eric Pelloux-Prayer <[email protected]> Reviewed-by: Marek Olšák <[email protected]> Signed-off-by: Qiang Yu <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17109> --- src/amd/llvm/ac_llvm_cull.c | 359 -------------------------------------------- src/amd/llvm/ac_llvm_cull.h | 63 -------- src/amd/llvm/meson.build | 2 - 3 files changed, 424 deletions(-) diff --git a/src/amd/llvm/ac_llvm_cull.c b/src/amd/llvm/ac_llvm_cull.c deleted file mode 100644 index d37a9f847f6..00000000000 --- a/src/amd/llvm/ac_llvm_cull.c +++ /dev/null @@ -1,359 +0,0 @@ -/* - * Copyright 2019 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - */ - -#include "ac_llvm_cull.h" - -#include <llvm-c/Core.h> - -struct ac_position_w_info { - /* If a primitive intersects the W=0 plane, it causes a reflection - * of the determinant used for face culling. Every vertex behind - * the W=0 plane negates the determinant, so having 2 vertices behind - * the plane has no effect. This is i1 true if the determinant should be - * negated. - */ - LLVMValueRef w_reflection; - - /* If we simplify the "-w <= p <= w" view culling equation, we get - * "-w <= w", which can't be satisfied when w is negative. - * In perspective projection, a negative W means that the primitive - * is behind the viewer, but the equation is independent of the type - * of projection. - * - * w_accepted is false when all W are negative and therefore - * the primitive is invisible. - */ - LLVMValueRef w_accepted; - - /* The bounding box culling doesn't work and should be skipped when this is true. */ - LLVMValueRef any_w_negative; -}; - -static void ac_analyze_position_w(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4], - struct ac_position_w_info *w, unsigned num_vertices) -{ - LLVMBuilderRef builder = ctx->builder; - LLVMValueRef all_w_negative = ctx->i1true; - - w->w_reflection = ctx->i1false; - w->any_w_negative = ctx->i1false; - - for (unsigned i = 0; i < num_vertices; i++) { - LLVMValueRef neg_w; - - neg_w = LLVMBuildFCmp(builder, LLVMRealOLT, pos[i][3], ctx->f32_0, ""); - /* If neg_w is true, negate w_reflection. */ - w->w_reflection = LLVMBuildXor(builder, w->w_reflection, neg_w, ""); - w->any_w_negative = LLVMBuildOr(builder, w->any_w_negative, neg_w, ""); - all_w_negative = LLVMBuildAnd(builder, all_w_negative, neg_w, ""); - } - w->w_accepted = LLVMBuildNot(builder, all_w_negative, ""); -} - -/* Perform front/back face culling and return true if the primitive is accepted. */ -static LLVMValueRef ac_cull_face(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4], - struct ac_position_w_info *w, bool cull_front, bool cull_back, - bool cull_zero_area) -{ - LLVMBuilderRef builder = ctx->builder; - - if (cull_front && cull_back) - return ctx->i1false; - - if (!cull_front && !cull_back && !cull_zero_area) - return ctx->i1true; - - /* Front/back face culling. Also if the determinant == 0, the triangle - * area is 0. - */ - LLVMValueRef det_t0 = LLVMBuildFSub(builder, pos[2][0], pos[0][0], ""); - LLVMValueRef det_t1 = LLVMBuildFSub(builder, pos[1][1], pos[0][1], ""); - LLVMValueRef det_t2 = LLVMBuildFSub(builder, pos[0][0], pos[1][0], ""); - LLVMValueRef det_t3 = LLVMBuildFSub(builder, pos[0][1], pos[2][1], ""); - /* t0 * t1 - t2 * t3 = t2 * -t3 + t0 * t1 = fma(t2, -t3, t0 * t1) */ - LLVMValueRef det = ac_build_fmad(ctx, det_t2, LLVMBuildFNeg(builder, det_t3, ""), - LLVMBuildFMul(builder, det_t0, det_t1, "")); - - /* Negative W negates the determinant. */ - det = LLVMBuildSelect(builder, w->w_reflection, LLVMBuildFNeg(builder, det, ""), det, ""); - - LLVMValueRef accepted = NULL; - if (cull_front) { - LLVMRealPredicate cond = cull_zero_area ? LLVMRealOGT : LLVMRealOGE; - accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, ""); - } else if (cull_back) { - LLVMRealPredicate cond = cull_zero_area ? LLVMRealOLT : LLVMRealOLE; - accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, ""); - } else if (cull_zero_area) { - accepted = LLVMBuildFCmp(builder, LLVMRealONE, det, ctx->f32_0, ""); - } - - if (accepted) { - /* Don't reject NaN and +/-infinity, these are tricky. - * Just trust fixed-function HW to handle these cases correctly. - */ - accepted = LLVMBuildOr(builder, accepted, ac_build_is_inf_or_nan(ctx, det), ""); - } - - return accepted; -} - -static void rotate_45degrees(struct ac_llvm_context *ctx, LLVMValueRef v[2]) -{ - /* sin(45) == cos(45) */ - LLVMValueRef sincos45 = LLVMConstReal(ctx->f32, 0.707106781); - - /* x2 = x*cos45 - y*sin45 = x*sincos45 - y*sincos45 - * y2 = x*sin45 + y*cos45 = x*sincos45 + y*sincos45 - */ - LLVMValueRef first = LLVMBuildFMul(ctx->builder, v[0], sincos45, ""); - - /* Doing 2x ffma while duplicating the multiplication is 33% faster than fmul+fadd+fadd. */ - LLVMValueRef result[2] = { - ac_build_fmad(ctx, LLVMBuildFNeg(ctx->builder, v[1], ""), sincos45, first), - ac_build_fmad(ctx, v[1], sincos45, first), - }; - - memcpy(v, result, sizeof(result)); -} - -/* Perform view culling and small primitive elimination and return true - * if the primitive is accepted and initially_accepted == true. */ -static void cull_bbox(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4], - LLVMValueRef initially_accepted, struct ac_position_w_info *w, - LLVMValueRef vp_scale[2], LLVMValueRef vp_translate[2], - LLVMValueRef small_prim_precision, - LLVMValueRef clip_half_line_width[2], - struct ac_cull_options *options, - ac_cull_accept_func accept_func, void *userdata) -{ - LLVMBuilderRef builder = ctx->builder; - - if (!options->cull_view_xy && !options->cull_view_near_z && !options->cull_view_far_z && - !options->cull_small_prims) { - if (accept_func) - accept_func(ctx, initially_accepted, userdata); - return; - } - - ac_build_ifcc(ctx, initially_accepted, 10000000); - { - LLVMValueRef bbox_min[3], bbox_max[3]; - LLVMValueRef accepted = ctx->i1true; - - /* Compute the primitive bounding box for easy culling. */ - for (unsigned chan = 0; chan < (options->cull_view_near_z || - options->cull_view_far_z ? 3 : 2); chan++) { - assert(options->num_vertices >= 2); - bbox_min[chan] = ac_build_fmin(ctx, pos[0][chan], pos[1][chan]); - bbox_max[chan] = ac_build_fmax(ctx, pos[0][chan], pos[1][chan]); - - if (options->num_vertices == 3) { - bbox_min[chan] = ac_build_fmin(ctx, bbox_min[chan], pos[2][chan]); - bbox_max[chan] = ac_build_fmax(ctx, bbox_max[chan], pos[2][chan]); - } - - if (clip_half_line_width[chan]) { - bbox_min[chan] = LLVMBuildFSub(builder, bbox_min[chan], clip_half_line_width[chan], ""); - bbox_max[chan] = LLVMBuildFAdd(builder, bbox_max[chan], clip_half_line_width[chan], ""); - } - } - - /* View culling. */ - if (options->cull_view_xy || options->cull_view_near_z || options->cull_view_far_z) { - for (unsigned chan = 0; chan < 3; chan++) { - LLVMValueRef visible; - - if ((options->cull_view_xy && chan <= 1) || (options->cull_view_near_z && chan == 2)) { - float t = chan == 2 && options->use_halfz_clip_space ? 0 : -1; - visible = LLVMBuildFCmp(builder, LLVMRealOGE, bbox_max[chan], - LLVMConstReal(ctx->f32, t), ""); - accepted = LLVMBuildAnd(builder, accepted, visible, ""); - } - - if ((options->cull_view_xy && chan <= 1) || (options->cull_view_far_z && chan == 2)) { - visible = LLVMBuildFCmp(builder, LLVMRealOLE, bbox_min[chan], ctx->f32_1, ""); - accepted = LLVMBuildAnd(builder, accepted, visible, ""); - } - } - } - - /* Small primitive culling - triangles. */ - if (options->cull_small_prims && options->num_vertices == 3) { - /* Assuming a sample position at (0.5, 0.5), if we round - * the bounding box min/max extents and the results of - * the rounding are equal in either the X or Y direction, - * the bounding box does not intersect the sample. - * - * See these GDC slides for pictures: - * https://frostbite-wp-prd.s3.amazonaws.com/wp-content/uploads/2016/03/29204330/GDC_2016_Compute.pdf - */ - LLVMValueRef min, max, not_equal[2], visible; - - for (unsigned chan = 0; chan < 2; chan++) { - /* Convert the position to screen-space coordinates. */ - min = ac_build_fmad(ctx, bbox_min[chan], vp_scale[chan], vp_translate[chan]); - max = ac_build_fmad(ctx, bbox_max[chan], vp_scale[chan], vp_translate[chan]); - /* Scale the bounding box according to the precision of - * the rasterizer and the number of MSAA samples. */ - min = LLVMBuildFSub(builder, min, small_prim_precision, ""); - max = LLVMBuildFAdd(builder, max, small_prim_precision, ""); - - /* Determine if the bbox intersects the sample point. - * It also works for MSAA, but vp_scale, vp_translate, - * and small_prim_precision are computed differently. - */ - min = ac_build_round(ctx, min); - max = ac_build_round(ctx, max); - not_equal[chan] = LLVMBuildFCmp(builder, LLVMRealONE, min, max, ""); - } - visible = LLVMBuildAnd(builder, not_equal[0], not_equal[1], ""); - accepted = LLVMBuildAnd(builder, accepted, visible, ""); - } - - /* Small primitive culling - lines. */ - if (options->cull_small_prims && options->num_vertices == 2) { - /* This only works with lines without perpendicular end caps (lines with perpendicular - * end caps are rasterized as quads and thus can't be culled as small prims in 99% of - * cases because line_width >= 1). - * - * This takes advantage of the diamont exit rule, which says that every pixel - * has a diamond inside it touching the pixel boundary and only if a line exits - * the diamond, that pixel is filled. If a line enters the diamond or stays - * outside the diamond, the pixel isn't filled. - * - * This algorithm is a little simpler than that. The space outside all diamonds also - * has the same diamond shape, which we'll call corner diamonds. - * - * The idea is to cull all lines that are entirely inside a diamond, including - * corner diamonds. If a line is entirely inside a diamond, it can be culled because - * it doesn't exit it. If a line is entirely inside a corner diamond, it can be culled - * because it doesn't enter any diamond and thus can't exit any diamond. - * - * The viewport is rotated by 45 degress to turn diamonds into squares, and a bounding - * box test is used to determine whether a line is entirely inside any square (diamond). - * - * The line width doesn't matter. Wide lines only duplicate filled pixels in either X or - * Y direction from the filled pixels. MSAA also doesn't matter. MSAA should ideally use - * perpendicular end caps that enable quad rasterization for lines. Thus, this should - * always use non-MSAA viewport transformation and non-MSAA small prim precision. - * - * A good test is piglit/lineloop because it draws 10k subpixel lines in a circle. - * It should contain no holes if this matches hw behavior. - */ - LLVMValueRef v0[2], v1[2]; - - /* Get vertex positions in pixels. */ - for (unsigned chan = 0; chan < 2; chan++) { - v0[chan] = ac_build_fmad(ctx, pos[0][chan], vp_scale[chan], vp_translate[chan]); - v1[chan] = ac_build_fmad(ctx, pos[1][chan], vp_scale[chan], vp_translate[chan]); - } - - /* Rotate the viewport by 45 degress, so that diamonds become squares. */ - rotate_45degrees(ctx, v0); - rotate_45degrees(ctx, v1); - - LLVMValueRef not_equal[2]; - - for (unsigned chan = 0; chan < 2; chan++) { - /* The width of each square is sqrt(0.5), so scale it to 1 because we want - * round() to give us the position of the closest center of a square (diamond). - */ - v0[chan] = LLVMBuildFMul(builder, v0[chan], LLVMConstReal(ctx->f32, 1.414213562), ""); - v1[chan] = LLVMBuildFMul(builder, v1[chan], LLVMConstReal(ctx->f32, 1.414213562), ""); - - /* Compute the bounding box around both vertices. We do this because we must - * enlarge the line area by the precision of the rasterizer. - */ - LLVMValueRef min = ac_build_fmin(ctx, v0[chan], v1[chan]); - LLVMValueRef max = ac_build_fmax(ctx, v0[chan], v1[chan]); - - /* Enlarge the bounding box by the precision of the rasterizer. */ - min = LLVMBuildFSub(builder, min, small_prim_precision, ""); - max = LLVMBuildFAdd(builder, max, small_prim_precision, ""); - - /* Round the bounding box corners. If both rounded corners are equal, - * the bounding box is entirely inside a square (diamond). - */ - min = ac_build_round(ctx, min); - max = ac_build_round(ctx, max); - not_equal[chan] = LLVMBuildFCmp(builder, LLVMRealONE, min, max, ""); - } - - accepted = LLVMBuildAnd(builder, accepted, - LLVMBuildOr(builder, not_equal[0], not_equal[1], ""), ""); - } - - /* Disregard the bounding box culling if any W is negative because the code - * doesn't work with that. - */ - accepted = LLVMBuildOr(builder, accepted, w->any_w_negative, ""); - - if (accept_func) - accept_func(ctx, accepted, userdata); - } - ac_build_endif(ctx, 10000000); -} - -/** - * Return i1 true if the primitive is accepted (not culled). - * - * \param pos Vertex positions 3x vec4 - * \param initially_accepted AND'ed with the result. Some computations can be - * skipped if this is false. - * \param vp_scale Viewport scale XY. - * For MSAA, multiply them by the number of samples. - * \param vp_translate Viewport translation XY. - * For MSAA, multiply them by the number of samples. - * \param small_prim_precision Precision of small primitive culling. This should - * be the same as or greater than the precision of - * the rasterizer. Set to num_samples / 2^subpixel_bits. - * subpixel_bits are defined by the quantization mode. - * \param options See ac_cull_options. - * \param accept_func Callback invoked in the inner-most branch where the primitive is accepted. - */ -void ac_cull_primitive(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4], - LLVMValueRef initially_accepted, LLVMValueRef vp_scale[2], - LLVMValueRef vp_translate[2], LLVMValueRef small_prim_precision, - LLVMValueRef clip_half_line_width[2], struct ac_cull_options *options, - ac_cull_accept_func accept_func, void *userdata) -{ - struct ac_position_w_info w; - ac_analyze_position_w(ctx, pos, &w, options->num_vertices); - - /* W culling. */ - LLVMValueRef accepted = options->cull_w ? w.w_accepted : ctx->i1true; - accepted = LLVMBuildAnd(ctx->builder, accepted, initially_accepted, ""); - - /* Face culling. */ - accepted = LLVMBuildAnd( - ctx->builder, accepted, - ac_cull_face(ctx, pos, &w, options->cull_front, options->cull_back, options->cull_zero_area), - ""); - - /* View culling and small primitive elimination. */ - cull_bbox(ctx, pos, accepted, &w, vp_scale, vp_translate, small_prim_precision, - clip_half_line_width, options, accept_func, userdata); -} diff --git a/src/amd/llvm/ac_llvm_cull.h b/src/amd/llvm/ac_llvm_cull.h deleted file mode 100644 index dc978d3fe04..00000000000 --- a/src/amd/llvm/ac_llvm_cull.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright 2019 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - */ - -#ifndef AC_LLVM_CULL_H -#define AC_LLVM_CULL_H - -#include "ac_llvm_build.h" - -struct ac_cull_options { - /* In general, I recommend setting all to true except view Z culling, - * which isn't so effective because W culling is cheaper and partially - * replaces near Z culling, and you don't need to set Position.z - * if Z culling is disabled. - * - * If something doesn't work, turn some of these off to find out what. - */ - bool cull_front; - bool cull_back; - bool cull_view_xy; - bool cull_view_near_z; - bool cull_view_far_z; - bool cull_small_prims; - bool cull_zero_area; - bool cull_w; /* cull primitives with all W < 0 */ - - bool use_halfz_clip_space; - - uint8_t num_vertices; /* 1..3 */ -}; - -/* Callback invoked in the inner-most branch where the primitive is accepted. */ -typedef void (*ac_cull_accept_func)(struct ac_llvm_context *ctx, LLVMValueRef accepted, - void *userdata); - -void ac_cull_primitive(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4], - LLVMValueRef initially_accepted, LLVMValueRef vp_scale[2], - LLVMValueRef vp_translate[2], LLVMValueRef small_prim_precision, - LLVMValueRef clip_half_line_width[2], struct ac_cull_options *options, - ac_cull_accept_func accept_func, void *userdata); - -#endif diff --git a/src/amd/llvm/meson.build b/src/amd/llvm/meson.build index d52709eb671..183dc607fe6 100644 --- a/src/amd/llvm/meson.build +++ b/src/amd/llvm/meson.build @@ -21,8 +21,6 @@ amd_common_llvm_files = files( 'ac_llvm_build.c', 'ac_llvm_build.h', - 'ac_llvm_cull.c', - 'ac_llvm_cull.h', 'ac_llvm_helper.cpp', 'ac_llvm_util.c', 'ac_llvm_util.h',
