Mesa (master): radv: Update version to 1.1.70.
Module: Mesa Branch: master Commit: 68201ab2dae3fc8550e013ace110b77c2d1b94a9 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=68201ab2dae3fc8550e013ace110b77c2d1b94a9 Author: Bas NieuwenhuizenDate: Fri Mar 9 00:49:57 2018 +0100 radv: Update version to 1.1.70. Turns out they did not reset the patch number on release. Reviewed-by: Dave Airlie --- src/amd/vulkan/radv_extensions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py index 469b09a160..bfee1f76fa 100644 --- a/src/amd/vulkan/radv_extensions.py +++ b/src/amd/vulkan/radv_extensions.py @@ -31,7 +31,7 @@ import xml.etree.cElementTree as et from mako.template import Template -MAX_API_VERSION = '1.1.0' +MAX_API_VERSION = '1.1.70' class Extension: def __init__(self, name, ext_version, enable): @@ -274,7 +274,7 @@ uint32_t radv_physical_device_api_version(struct radv_physical_device *dev) { if (!ANDROID && dev->rad_info.has_syncobj_wait_for_submit) -return VK_MAKE_VERSION(1, 1, 0); +return VK_MAKE_VERSION(1, 1, 70); return VK_MAKE_VERSION(1, 0, 68); } """) ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv: Generate icd files.
Module: Mesa Branch: master Commit: b6347807a92d8fcf9acbaeb1af2f0aae15d78b3a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b6347807a92d8fcf9acbaeb1af2f0aae15d78b3a Author: Bas NieuwenhuizenDate: Fri Mar 9 00:47:26 2018 +0100 radv: Generate icd files. If the api version is too low, the loader clamps the application requested version to the advertized version, which messes with which extensions are enabled. Reviewed-by: Dave Airlie --- src/amd/vulkan/dev_icd.json.in| 7 -- src/amd/vulkan/meson.build| 34 +++- src/amd/vulkan/radeon_icd.json.in | 7 -- src/amd/vulkan/radv_icd.py| 47 +++ 4 files changed, 70 insertions(+), 25 deletions(-) diff --git a/src/amd/vulkan/dev_icd.json.in b/src/amd/vulkan/dev_icd.json.in deleted file mode 100644 index dfd032cdfb..00 --- a/src/amd/vulkan/dev_icd.json.in +++ /dev/null @@ -1,7 +0,0 @@ -{ -"file_format_version": "1.0.0", -"ICD": { -"library_path": "@libvulkan_radeon_path@", -"api_version": "1.1.0" -} -} diff --git a/src/amd/vulkan/meson.build b/src/amd/vulkan/meson.build index 98051560a5..61aa8c4fde 100644 --- a/src/amd/vulkan/meson.build +++ b/src/amd/vulkan/meson.build @@ -134,18 +134,30 @@ libvulkan_radeon = shared_library( install : true, ) -radv_data = configuration_data() -radv_data.set('install_libdir', join_paths(get_option('prefix'), get_option('libdir'))) -radv_data.set('libvulkan_radeon_path', libvulkan_radeon.full_path()) - -configure_file( - configuration : radv_data, - input : 'radeon_icd.json.in', +radeon_icd = custom_target( + 'radeon_icd', + input : 'radv_icd.py', output : 'radeon_icd.@0@.json'.format(host_machine.cpu()), + command : [ +prog_python2, '@INPUT@', +'--lib-path', join_paths(get_option('prefix'), get_option('libdir')), +'--out', '@OUTPUT@', + ], + depend_files : files('radv_extensions.py'), + build_by_default : true, install_dir : with_vulkan_icd_dir, + install : true, ) -configure_file( - configuration : radv_data, - input : 'dev_icd.json.in', - output : 'dev_icd.json' + +radv_dev_icd = custom_target( + 'radv_dev_icd', + input : 'radv_icd.py', + output : 'dev_icd.json', + command : [ +prog_python2, '@INPUT@', '--lib-path', meson.current_build_dir(), +'--out', '@OUTPUT@' + ], + depend_files : files('radv_extensions.py'), + build_by_default : true, + install : false, ) diff --git a/src/amd/vulkan/radeon_icd.json.in b/src/amd/vulkan/radeon_icd.json.in deleted file mode 100644 index a99cb80ee9..00 --- a/src/amd/vulkan/radeon_icd.json.in +++ /dev/null @@ -1,7 +0,0 @@ -{ -"file_format_version": "1.0.0", -"ICD": { -"library_path": "@install_libdir@/libvulkan_radeon.so", -"api_version": "1.1.0" -} -} diff --git a/src/amd/vulkan/radv_icd.py b/src/amd/vulkan/radv_icd.py new file mode 100644 index 00..78ed379bd2 --- /dev/null +++ b/src/amd/vulkan/radv_icd.py @@ -0,0 +1,47 @@ +# Copyright 2017 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sub license, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice (including the +# next paragraph) shall be included in all copies or substantial portions +# of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. +# IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR +# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import json +import os.path + +from radv_extensions import * + +if __name__ == '__main__': +parser = argparse.ArgumentParser() +parser.add_argument('--out', help='Output json file.', required=True) +parser.add_argument('--lib-path', help='Path to libvulkan_radeon.so') +args = parser.parse_args() + +path = 'libvulkan_radeon.so' +if args.lib_path: +path = os.path.join(args.lib_path, path) + +json_data = { +'file_format_version': '1.0.0', +'ICD': { +'library_path': path, +'api_version': str(MAX_API_VERSION), +}, +} + +with open(args.out, 'w') as f: +json.dump(json_data, f, indent = 4, sort_keys=True) ___ mesa-commit mailing
Mesa (master): i965/fs: Add infrastructure for generating CSEL instructions.
Module: Mesa Branch: master Commit: 70de61594dcf99f24eb31ebf98d62f13e1f44c2e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=70de61594dcf99f24eb31ebf98d62f13e1f44c2e Author: Kenneth GraunkeDate: Sun Nov 22 20:12:17 2015 -0800 i965/fs: Add infrastructure for generating CSEL instructions. v2 (idr): Don't allow CSEL with a non-float src2. v3 (idr): Add CSEL to fs_inst::flags_written. Suggested by Matt. v4 (idr): Only set BRW_ALIGN_16 on Gen < 10 (suggested by Matt). Don't reset the access mode afterwards (suggested by Samuel and Matt). Add support for CSEL not modifying the flags to more places (requested by Matt). Signed-off-by: Kenneth Graunke Signed-off-by: Ian Romanick Reviewed-by: Samuel Iglesias Gonsálvez [v3] Reviewed-by: Matt Turner --- src/intel/compiler/brw_disasm.c | 1 + src/intel/compiler/brw_eu.h | 1 + src/intel/compiler/brw_eu_emit.c| 1 + src/intel/compiler/brw_fs.cpp | 2 ++ src/intel/compiler/brw_fs_builder.h | 22 +- src/intel/compiler/brw_fs_generator.cpp | 6 ++ src/intel/compiler/brw_ir_vec4.h| 1 + src/intel/compiler/brw_vec4.cpp | 1 + 8 files changed, 34 insertions(+), 1 deletion(-) diff --git a/src/intel/compiler/brw_disasm.c b/src/intel/compiler/brw_disasm.c index a9a108f8ac..5f75c67942 100644 --- a/src/intel/compiler/brw_disasm.c +++ b/src/intel/compiler/brw_disasm.c @@ -1508,6 +1508,7 @@ brw_disassemble_inst(FILE *file, const struct gen_device_info *devinfo, */ if (brw_inst_cond_modifier(devinfo, inst) && (devinfo->gen < 6 || (opcode != BRW_OPCODE_SEL && +opcode != BRW_OPCODE_CSEL && opcode != BRW_OPCODE_IF && opcode != BRW_OPCODE_WHILE))) { format(file, ".f%"PRIu64, diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h index a5f28d8fc6..ca72666a55 100644 --- a/src/intel/compiler/brw_eu.h +++ b/src/intel/compiler/brw_eu.h @@ -171,6 +171,7 @@ ALU2(SHR) ALU2(SHL) ALU1(DIM) ALU2(ASR) +ALU3(CSEL) ALU1(F32TO16) ALU1(F16TO32) ALU2(ADD) diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index f8102e014e..f039af56d0 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -959,6 +959,7 @@ ALU2(SHR) ALU2(SHL) ALU1(DIM) ALU2(ASR) +ALU3(CSEL) ALU1(FRC) ALU1(RNDD) ALU2(MAC) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 53ba94..02a8ea0fd9 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -945,6 +945,7 @@ unsigned fs_inst::flags_written() const { if ((conditional_mod && (opcode != BRW_OPCODE_SEL && +opcode != BRW_OPCODE_CSEL && opcode != BRW_OPCODE_IF && opcode != BRW_OPCODE_WHILE)) || opcode == FS_OPCODE_MOV_DISPATCH_TO_FLAGS || @@ -5578,6 +5579,7 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) fprintf(file, "%s", conditional_modifier[inst->conditional_mod]); if (!inst->predicate && (devinfo->gen < 5 || (inst->opcode != BRW_OPCODE_SEL && +inst->opcode != BRW_OPCODE_CSEL && inst->opcode != BRW_OPCODE_IF && inst->opcode != BRW_OPCODE_WHILE))) { fprintf(file, ".f%d.%d", inst->flag_subreg / 2, diff --git a/src/intel/compiler/brw_fs_builder.h b/src/intel/compiler/brw_fs_builder.h index cf603b0c86..4203c8c27c 100644 --- a/src/intel/compiler/brw_fs_builder.h +++ b/src/intel/compiler/brw_fs_builder.h @@ -567,7 +567,6 @@ namespace brw { ALU1(BFREV) ALU1(CBIT) ALU2(CMPN) - ALU3(CSEL) ALU1(DIM) ALU2(DP2) ALU2(DP3) @@ -643,6 +642,27 @@ namespace brw { } /** + * CSEL: dst = src2 0.0f ? src0 : src1 + */ + instruction * + CSEL(const dst_reg , const src_reg , const src_reg , + const src_reg , brw_conditional_mod condition) const + { + /* CSEL only operates on floats, so we can't do integer =/> + * comparisons. Zero/non-zero (== and !=) comparisons almost work. + * 0x8000 fails because it is -0.0, and -0.0 == 0.0. + */ + assert(src2.type == BRW_REGISTER_TYPE_F); + + return set_condmod(condition, +emit(BRW_OPCODE_CSEL, + retype(dst, BRW_REGISTER_TYPE_F), + retype(src0, BRW_REGISTER_TYPE_F), + retype(src1, BRW_REGISTER_TYPE_F), + src2)); + } + + /** * Emit a linear interpolation instruction. */
Mesa (master): i965/vec4: Allow CSE on subset VF constant loads
Module: Mesa Branch: master Commit: 1583f49eaae0292eba1a04e67125bb4b92b33b0a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1583f49eaae0292eba1a04e67125bb4b92b33b0a Author: Ian RomanickDate: Fri Feb 16 17:33:13 2018 -0800 i965/vec4: Allow CSE on subset VF constant loads v2: Rewrite the code that generates the VF mask. Suggested by Ken. No changes on other platforms. Haswell, Ivy Bridge, and Sandy Bridge had similar results. (Haswell shown) total instructions in shared programs: 13059891 -> 13059884 (<.01%) instructions in affected programs: 431 -> 424 (-1.62%) helped: 7 HURT: 0 helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 helped stats (rel) min: 1.19% max: 5.26% x̄: 2.05% x̃: 1.49% 95% mean confidence interval for instructions value: -1.00 -1.00 95% mean confidence interval for instructions %-change: -3.39% -0.71% Instructions are helped. total cycles in shared programs: 409260032 -> 409260018 (<.01%) cycles in affected programs: 4228 -> 4214 (-0.33%) helped: 7 HURT: 0 helped stats (abs) min: 2 max: 2 x̄: 2.00 x̃: 2 helped stats (rel) min: 0.28% max: 2.04% x̄: 0.54% x̃: 0.28% 95% mean confidence interval for cycles value: -2.00 -2.00 95% mean confidence interval for cycles %-change: -1.15% 0.07% Inconclusive result (%-change mean confidence interval includes 0). Signed-off-by: Ian Romanick Reviewed-by: Kenneth Graunke --- src/intel/compiler/brw_vec4_cse.cpp | 19 +++ 1 file changed, 19 insertions(+) diff --git a/src/intel/compiler/brw_vec4_cse.cpp b/src/intel/compiler/brw_vec4_cse.cpp index d9f08c9631..c9cf54c6f7 100644 --- a/src/intel/compiler/brw_vec4_cse.cpp +++ b/src/intel/compiler/brw_vec4_cse.cpp @@ -104,6 +104,25 @@ operands_match(const vec4_instruction *a, const vec4_instruction *b) return xs[0].equals(ys[0]) && ((xs[1].equals(ys[1]) && xs[2].equals(ys[2])) || (xs[2].equals(ys[1]) && xs[1].equals(ys[2]))); + } else if (a->opcode == BRW_OPCODE_MOV && + xs[0].file == IMM && + xs[0].type == BRW_REGISTER_TYPE_VF) { + src_reg tmp_x = xs[0]; + src_reg tmp_y = ys[0]; + + /* Smash out the values that are not part of the writemask. Otherwise + * the equals operator will fail due to mismatches in unused components. + */ + const unsigned ab_writemask = a->dst.writemask & b->dst.writemask; + const uint32_t mask = ((ab_writemask & WRITEMASK_X) ? 0x00ff : 0) | +((ab_writemask & WRITEMASK_Y) ? 0xff00 : 0) | +((ab_writemask & WRITEMASK_Z) ? 0x00ff : 0) | +((ab_writemask & WRITEMASK_W) ? 0xff00 : 0); + + tmp_x.ud &= mask; + tmp_y.ud &= mask; + + return tmp_x.equals(tmp_y); } else if (!a->is_commutative()) { return xs[0].equals(ys[0]) && xs[1].equals(ys[1]) && xs[2].equals(ys[2]); } else { ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): nir: Don't i2b a value that is already Boolean
Module: Mesa Branch: master Commit: 6878c9aabc6077e6de75b269fdfb8ff423d05042 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6878c9aabc6077e6de75b269fdfb8ff423d05042 Author: Ian RomanickDate: Wed Feb 21 18:15:52 2018 -0800 nir: Don't i2b a value that is already Boolean A bunch of shaders have sequences like: i2b(u2i(floatBitsToUint(intBitsToFloat(x == y ? -1 : 0 Other optimizations (and NIR's typeless nature) reduce this to i2b(x == y) which is silly. Skylake total instructions in shared programs: 14498698 -> 14497948 (<.01%) instructions in affected programs: 74480 -> 73730 (-1.01%) helped: 277 HURT: 0 helped stats (abs) min: 1 max: 32 x̄: 2.71 x̃: 2 helped stats (rel) min: 0.04% max: 13.79% x̄: 1.45% x̃: 0.68% 95% mean confidence interval for instructions value: -3.35 -2.06 95% mean confidence interval for instructions %-change: -1.74% -1.16% Instructions are helped. total cycles in shared programs: 532015500 -> 531999238 (<.01%) cycles in affected programs: 5943878 -> 5927616 (-0.27%) helped: 251 HURT: 74 helped stats (abs) min: 1 max: 13149 x̄: 127.89 x̃: 14 helped stats (rel) min: 0.01% max: 17.31% x̄: 1.55% x̃: 0.53% HURT stats (abs) min: 1 max: 4550 x̄: 214.04 x̃: 15 HURT stats (rel) min: <.01% max: 44.43% x̄: 2.81% x̃: 0.33% 95% mean confidence interval for cycles value: -158.51 58.43 95% mean confidence interval for cycles %-change: -1.07% -0.04% Inconclusive result (value mean confidence interval includes 0). total loops in shared programs: 4753 -> 4735 (-0.38%) loops in affected programs: 18 -> 0 helped: 18 HURT: 0 helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 helped stats (rel) min: 100.00% max: 100.00% x̄: 100.00% x̃: 100.00% 95% mean confidence interval for loops value: -1.00 -1.00 95% mean confidence interval for loops %-change: -100.00% -100.00% Loops are helped. Haswell and Broadwell had simliar results. (Broadwell shown) total instructions in shared programs: 14791877 -> 14791127 (<.01%) instructions in affected programs: 77326 -> 76576 (-0.97%) helped: 278 HURT: 1 helped stats (abs) min: 1 max: 32 x̄: 2.70 x̃: 2 helped stats (rel) min: 0.04% max: 13.79% x̄: 1.42% x̃: 0.68% HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 HURT stats (rel) min: 0.49% max: 0.49% x̄: 0.49% x̃: 0.49% 95% mean confidence interval for instructions value: -3.33 -2.05 95% mean confidence interval for instructions %-change: -1.70% -1.13% Instructions are helped. total cycles in shared programs: 558250067 -> 558252872 (<.01%) cycles in affected programs: 5806328 -> 5809133 (0.05%) helped: 235 HURT: 83 helped stats (abs) min: 1 max: 10630 x̄: 81.73 x̃: 16 helped stats (rel) min: 0.03% max: 18.58% x̄: 1.60% x̃: 0.51% HURT stats (abs) min: 1 max: 10590 x̄: 265.19 x̃: 20 HURT stats (rel) min: <.01% max: 15.28% x̄: 1.89% x̃: 0.54% 95% mean confidence interval for cycles value: -89.87 107.51 95% mean confidence interval for cycles %-change: -1.06% -0.32% Inconclusive result (value mean confidence interval includes 0). total loops in shared programs: 4735 -> 4717 (-0.38%) loops in affected programs: 18 -> 0 helped: 18 HURT: 0 helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 helped stats (rel) min: 100.00% max: 100.00% x̄: 100.00% x̃: 100.00% 95% mean confidence interval for loops value: -1.00 -1.00 95% mean confidence interval for loops %-change: -100.00% -100.00% Loops are helped. total fills in shared programs: 83111 -> 83110 (<.01%) fills in affected programs: 28 -> 27 (-3.57%) helped: 1 HURT: 0 Ivy Bridge total instructions in shared programs: 11774173 -> 11773436 (<.01%) instructions in affected programs: 70819 -> 70082 (-1.04%) helped: 267 HURT: 0 helped stats (abs) min: 1 max: 48 x̄: 2.76 x̃: 2 helped stats (rel) min: 0.21% max: 19.51% x̄: 1.57% x̃: 0.63% 95% mean confidence interval for instructions value: -3.51 -2.01 95% mean confidence interval for instructions %-change: -1.94% -1.21% Instructions are helped. total cycles in shared programs: 257153833 -> 257148932 (<.01%) cycles in affected programs: 585341 -> 580440 (-0.84%) helped: 167 HURT: 100 helped stats (abs) min: 1 max: 1327 x̄: 44.89 x̃: 16 helped stats (rel) min: 0.04% max: 26.54% x̄: 2.41% x̃: 0.88% HURT stats (abs) min: 1 max: 200 x̄: 25.95 x̃: 16 HURT stats (rel) min: 0.04% max: 9.81% x̄: 1.34% x̃: 0.65% 95% mean confidence interval for cycles value: -33.25 -3.46 95% mean confidence interval for cycles %-change: -1.47% -0.54% Cycles are helped. total loops in shared programs: 3416 -> 3398 (-0.53%) loops in affected programs: 18 -> 0 helped: 18 HURT: 0 helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 helped stats (rel) min: 100.00% max: 100.00% x̄: 100.00% x̃: 100.00% 95% mean confidence interval for loops value: -1.00 -1.00 95% mean confidence interval for loops %-change: -100.00% -100.00% Loops are helped. LOST: 2 GAINED: 0 Sandy Bridge total instructions in shared programs: 10499306 -> 10499094 (<.01%) instructions in affected programs: 6051 -> 5839 (-3.50%)
Mesa (master): nir: Narrow some dot product operations
Module: Mesa Branch: master Commit: 54e8d2268de37f320b2d206295d0b519f5be5ab7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=54e8d2268de37f320b2d206295d0b519f5be5ab7 Author: Ian RomanickDate: Thu Feb 15 14:49:55 2018 -0800 nir: Narrow some dot product operations On vector platforms, this helps elide some constant loads. v2: Reorder the transformations. No changes on Broadwell or Skylake. Haswell total instructions in shared programs: 13093793 -> 13060163 (-0.26%) instructions in affected programs: 1277532 -> 1243902 (-2.63%) helped: 13216 HURT: 95 helped stats (abs) min: 1 max: 18 x̄: 2.56 x̃: 2 helped stats (rel) min: 0.21% max: 20.00% x̄: 3.63% x̃: 2.78% HURT stats (abs) min: 1 max: 6 x̄: 1.77 x̃: 1 HURT stats (rel) min: 0.09% max: 5.56% x̄: 1.25% x̃: 1.19% 95% mean confidence interval for instructions value: -2.57 -2.49 95% mean confidence interval for instructions %-change: -3.65% -3.54% Instructions are helped. total cycles in shared programs: 409580819 -> 409268463 (-0.08%) cycles in affected programs: 71730652 -> 71418296 (-0.44%) helped: 9898 HURT: 2352 helped stats (abs) min: 2 max: 16014 x̄: 37.08 x̃: 16 helped stats (rel) min: <.01% max: 35.55% x̄: 6.26% x̃: 4.50% HURT stats (abs) min: 2 max: 276 x̄: 23.25 x̃: 6 HURT stats (rel) min: <.01% max: 40.00% x̄: 3.54% x̃: 1.97% 95% mean confidence interval for cycles value: -33.19 -17.80 95% mean confidence interval for cycles %-change: -4.50% -4.26% Cycles are helped. total fills in shared programs: 82059 -> 82052 (<.01%) fills in affected programs: 21 -> 14 (-33.33%) helped: 7 HURT: 0 Sandy Bridge and Ivy Bridge had similar results (Ivy Bridge shown) total instructions in shared programs: 11811851 -> 11780605 (-0.26%) instructions in affected programs: 1155007 -> 1123761 (-2.71%) helped: 12304 HURT: 95 helped stats (abs) min: 1 max: 18 x̄: 2.55 x̃: 2 helped stats (rel) min: 0.21% max: 20.00% x̄: 3.69% x̃: 2.86% HURT stats (abs) min: 1 max: 6 x̄: 1.77 x̃: 1 HURT stats (rel) min: 0.09% max: 5.56% x̄: 1.25% x̃: 1.19% 95% mean confidence interval for instructions value: -2.56 -2.48 95% mean confidence interval for instructions %-change: -3.71% -3.59% Instructions are helped. total cycles in shared programs: 257618409 -> 257316805 (-0.12%) cycles in affected programs: 71999580 -> 71697976 (-0.42%) helped: 9155 HURT: 2380 helped stats (abs) min: 2 max: 16014 x̄: 38.44 x̃: 16 helped stats (rel) min: <.01% max: 35.75% x̄: 6.39% x̃: 4.62% HURT stats (abs) min: 2 max: 290 x̄: 21.14 x̃: 4 HURT stats (rel) min: <.01% max: 41.55% x̄: 3.14% x̃: 1.33% 95% mean confidence interval for cycles value: -34.32 -17.97 95% mean confidence interval for cycles %-change: -4.55% -4.29% Cycles are helped. GM45 and Iron Lake had nearly identical results (Iron Lake shown) total instructions in shared programs: 7886750 -> 7879944 (-0.09%) instructions in affected programs: 373781 -> 366975 (-1.82%) helped: 3715 HURT: 47 helped stats (abs) min: 1 max: 8 x̄: 1.86 x̃: 1 helped stats (rel) min: 0.22% max: 16.67% x̄: 2.88% x̃: 2.06% HURT stats (abs) min: 1 max: 6 x̄: 2.55 x̃: 2 HURT stats (rel) min: 1.09% max: 5.00% x̄: 1.93% x̃: 2.35% 95% mean confidence interval for instructions value: -1.85 -1.77 95% mean confidence interval for instructions %-change: -2.91% -2.73% Instructions are helped. total cycles in shared programs: 178114636 -> 178095452 (-0.01%) cycles in affected programs: 7227666 -> 7208482 (-0.27%) helped: 3349 HURT: 301 helped stats (abs) min: 2 max: 90 x̄: 6.55 x̃: 4 helped stats (rel) min: <.01% max: 14.18% x̄: 0.95% x̃: 0.63% HURT stats (abs) min: 2 max: 42 x̄: 9.13 x̃: 10 HURT stats (rel) min: 0.01% max: 11.19% x̄: 1.22% x̃: 1.50% 95% mean confidence interval for cycles value: -5.52 -4.99 95% mean confidence interval for cycles %-change: -0.81% -0.73% Cycles are helped. Signed-off-by: Ian Romanick Reviewed-by: Samuel Iglesias Gonsálvez [v1] --- src/compiler/nir/nir_opt_algebraic.py | 8 1 file changed, 8 insertions(+) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index e500a31d8e..c42b72eedd 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -125,6 +125,14 @@ optimizations = [ (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'), (('~fadd', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma'), + (('fdot4', ('vec4', a, b, c, 1.0), d), ('fdph', ('vec3', a, b, c), d)), + (('fdot4', ('vec4', a, 0.0, 0.0, 0.0), b), ('fmul', a, b)), + (('fdot4', ('vec4', a, b, 0.0, 0.0), c), ('fdot2', ('vec2', a, b), c)), + (('fdot4', ('vec4', a, b, c, 0.0), d), ('fdot3', ('vec3', a, b, c), d)), + + (('fdot3', ('vec3', a, 0.0, 0.0), b), ('fmul', a, b)), + (('fdot3', ('vec3', a, b, 0.0), c), ('fdot2', ('vec2', a, b), c)), + # (a * #b + #c) << #d # ((a * #b) << #d) + (#c << #d) # (a * (#b
Mesa (master): i965/fs: Merge CMP and SEL into CSEL on Gen8+
Module: Mesa Branch: master Commit: 52c7df1643ec9af119fd66f916f7fbdbcc798d2d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=52c7df1643ec9af119fd66f916f7fbdbcc798d2d Author: Ian RomanickDate: Wed Feb 21 18:06:56 2018 -0800 i965/fs: Merge CMP and SEL into CSEL on Gen8+ v2: Fix several problems handling inverted predicates. Add a much bigger comment around the BRW_CONDITIONAL_NZ case. v3: Allow uniforms and shader inputs as sources for the original SEL and CMP instructions. This enables a LOT more shaders to receive CSEL merging (5816 vs 8564 on SKL). v4: Report progress. Broadwell and Skylake had similar results. (Broadwell shown) helped: 8527 HURT: 0 helped stats (abs) min: 1 max: 27 x̄: 2.44 x̃: 1 helped stats (rel) min: 0.03% max: 17.80% x̄: 1.12% x̃: 0.70% 95% mean confidence interval for instructions value: -2.51 -2.36 95% mean confidence interval for instructions %-change: -1.15% -1.10% Instructions are helped. total cycles in shared programs: 559442317 -> 558288357 (-0.21%) cycles in affected programs: 372699860 -> 371545900 (-0.31%) helped: 6748 HURT: 1450 helped stats (abs) min: 1 max: 32000 x̄: 182.41 x̃: 12 helped stats (rel) min: <.01% max: 66.08% x̄: 3.42% x̃: 0.70% HURT stats (abs) min: 1 max: 2538 x̄: 53.08 x̃: 14 HURT stats (rel) min: <.01% max: 96.72% x̄: 3.32% x̃: 0.90% 95% mean confidence interval for cycles value: -179.01 -102.51 95% mean confidence interval for cycles %-change: -2.37% -2.08% Cycles are helped. LOST: 0 GAINED: 6 No changes on earlier platforms. Signed-off-by: Ian Romanick Reviewed-by: Samuel Iglesias Gonsálvez [v1] Reviewed-by: Kenneth Graunke [v3] Reviewed-by: Matt Turner --- src/intel/compiler/brw_fs.cpp | 106 ++ src/intel/compiler/brw_fs.h | 1 + 2 files changed, 107 insertions(+) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 02a8ea0fd9..422eedcf0a 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -2844,6 +2844,106 @@ mask_relative_to(const fs_reg , const fs_reg , unsigned ds) } bool +fs_visitor::opt_peephole_csel() +{ + if (devinfo->gen < 8) + return false; + + bool progress = false; + + foreach_block_reverse(block, cfg) { + int ip = block->end_ip + 1; + + foreach_inst_in_block_reverse_safe(fs_inst, inst, block) { + ip--; + + if (inst->opcode != BRW_OPCODE_SEL || + inst->predicate != BRW_PREDICATE_NORMAL || + (inst->dst.type != BRW_REGISTER_TYPE_F && + inst->dst.type != BRW_REGISTER_TYPE_D && + inst->dst.type != BRW_REGISTER_TYPE_UD)) +continue; + + /* Because it is a 3-src instruction, CSEL cannot have an immediate + * value as a source, but we can sometimes handle zero. + */ + if ((inst->src[0].file != VGRF && inst->src[0].file != ATTR && + inst->src[0].file != UNIFORM) || + (inst->src[1].file != VGRF && inst->src[1].file != ATTR && + inst->src[1].file != UNIFORM && !inst->src[1].is_zero())) +continue; + + foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) { +if (!scan_inst->flags_written()) + continue; + +if ((scan_inst->opcode != BRW_OPCODE_CMP && + scan_inst->opcode != BRW_OPCODE_MOV) || +scan_inst->predicate != BRW_PREDICATE_NONE || +(scan_inst->src[0].file != VGRF && + scan_inst->src[0].file != ATTR && + scan_inst->src[0].file != UNIFORM) || +scan_inst->src[0].type != BRW_REGISTER_TYPE_F) + break; + +if (scan_inst->opcode == BRW_OPCODE_CMP && !scan_inst->src[1].is_zero()) + break; + +const brw::fs_builder ibld(this, block, inst); + +const enum brw_conditional_mod cond = + inst->predicate_inverse + ? brw_negate_cmod(scan_inst->conditional_mod) + : scan_inst->conditional_mod; + +fs_inst *csel_inst = NULL; + +if (inst->src[1].file != IMM) { + csel_inst = ibld.CSEL(inst->dst, + inst->src[0], + inst->src[1], + scan_inst->src[0], + cond); +} else if (cond == BRW_CONDITIONAL_NZ) { + /* Consider the sequence +* +* cmp.nz.f0 null<1>F g3<8,8,1>F 0F +* (+f0) sel g124<1>UD g2<8,8,1>UD 0xUD +* +* The sel will pick the immediate value 0 if r0 is ±0.0. +* Therefore, this sequence is equivalent: +* +* cmp.nz.f0
Mesa (master): i965/vec4: Relax writemask condition in CSE
Module: Mesa Branch: master Commit: 360899d4577a2431dc73b5c702d60ac6bd59ca07 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=360899d4577a2431dc73b5c702d60ac6bd59ca07 Author: Ian RomanickDate: Fri Feb 16 17:26:11 2018 -0800 i965/vec4: Relax writemask condition in CSE If the previously seen instruction generates more fields than the new instruction, still allow CSE to happen. This doesn't do much, but it also enables a couple more shaders in the next patch. It helped quite a bit in another change series that I have (at least for now) abandoned. v2: Add some extra comentary about the parameters to instructions_match. Suggested by Ken. No changes on Skylake, Broadwell, Iron Lake or GM45. Ivy Bridge and Haswell had similar results. (Ivy Bridge shown) total instructions in shared programs: 11780295 -> 11780294 (<.01%) instructions in affected programs: 302 -> 301 (-0.33%) helped: 1 HURT: 0 total cycles in shared programs: 257308315 -> 257308313 (<.01%) cycles in affected programs: 2074 -> 2072 (-0.10%) helped: 1 HURT: 0 Sandy Bridge total instructions in shared programs: 10506687 -> 10506686 (<.01%) instructions in affected programs: 335 -> 334 (-0.30%) helped: 1 HURT: 0 Signed-off-by: Ian Romanick Reviewed-by: Kenneth Graunke --- src/intel/compiler/brw_vec4_cse.cpp | 10 +- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/intel/compiler/brw_vec4_cse.cpp b/src/intel/compiler/brw_vec4_cse.cpp index 2e65ef7854..d9f08c9631 100644 --- a/src/intel/compiler/brw_vec4_cse.cpp +++ b/src/intel/compiler/brw_vec4_cse.cpp @@ -112,6 +112,14 @@ operands_match(const vec4_instruction *a, const vec4_instruction *b) } } +/** + * Checks if instructions match, exactly for sources, but loosely for + * destination writemasks. + * + * \param 'a' is the generating expression from the AEB entry. + * \param 'b' is the second occurrence of the expression that we're + *considering eliminating. + */ static bool instructions_match(vec4_instruction *a, vec4_instruction *b) { @@ -127,7 +135,7 @@ instructions_match(vec4_instruction *a, vec4_instruction *b) a->base_mrf == b->base_mrf && a->header_size == b->header_size && a->shadow_compare == b->shadow_compare && - a->dst.writemask == b->dst.writemask && + ((a->dst.writemask & b->dst.writemask) == a->dst.writemask) && a->force_writemask_all == b->force_writemask_all && a->size_written == b->size_written && a->exec_size == b->exec_size && ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): i965: perf: count number of accumlated reports
Module: Mesa Branch: master Commit: fb921a2870ae51cdad129438dfb1b20f1538b2fa URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=fb921a2870ae51cdad129438dfb1b20f1538b2fa Author: Lionel LandwerlinDate: Tue Mar 6 17:11:56 2018 + i965: perf: count number of accumlated reports This will be reused later. Signed-off-by: Lionel Landwerlin Reviewed-by: Kenneth Graunke Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_performance_query.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c index d0faf4a2cb..71ea26753e 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c @@ -290,6 +290,10 @@ struct brw_perf_query_object */ bool results_accumulated; + /** + * Number of reports accumulated to produce the results. + */ + uint32_t reports_accumulated; } oa; struct { @@ -658,6 +662,8 @@ add_deltas(struct brw_context *brw, int idx = 0; int i; + obj->oa.reports_accumulated++; + switch (query->oa_format) { case I915_OA_FORMAT_A32u40_A4u32_B8_C8: accumulate_uint32(start + 1, end + 1, accumulator + idx++); /* timestamp */ ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): i965: perf: consolidate unmapping oa perf bo outside accumulation
Module: Mesa Branch: master Commit: d10a39ebe085dc28ab7352f76b57d628928e1e40 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d10a39ebe085dc28ab7352f76b57d628928e1e40 Author: Lionel LandwerlinDate: Wed Mar 7 14:10:15 2018 + i965: perf: consolidate unmapping oa perf bo outside accumulation Do this in one place outside the only caller of the accumulation function. Signed-off-by: Lionel Landwerlin Reviewed-by: Kenneth Graunke Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_performance_query.c | 7 +++ 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c index 71ea26753e..13eff31ee6 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c @@ -1014,8 +1014,6 @@ end: DBG("Marking %d accumulated - results gathered\n", o->Id); - brw_bo_unmap(obj->oa.bo); - obj->oa.map = NULL; obj->oa.results_accumulated = true; drop_from_unaccumulated_query_list(brw, obj); dec_n_oa_users(brw); @@ -1024,8 +1022,6 @@ end: error: - brw_bo_unmap(obj->oa.bo); - obj->oa.map = NULL; discard_all_queries(brw); } @@ -1470,6 +1466,9 @@ get_oa_counter_data(struct brw_context *brw, if (!obj->oa.results_accumulated) { accumulate_oa_reports(brw, obj); assert(obj->oa.results_accumulated); + + brw_bo_unmap(obj->oa.bo); + obj->oa.map = NULL; } for (int i = 0; i < n_counters; i++) { ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): i965: perf: store sysfs device entry into context
Module: Mesa Branch: master Commit: b71da26496cad8179fce93e5b114bef2cddce987 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b71da26496cad8179fce93e5b114bef2cddce987 Author: Lionel LandwerlinDate: Wed Feb 7 18:09:58 2018 + i965: perf: store sysfs device entry into context We want to reuse it later on. Signed-off-by: Lionel Landwerlin Reviewed-by: Kenneth Graunke Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_context.h | 3 + src/mesa/drivers/dri/i965/brw_performance_query.c | 146 +++--- 2 files changed, 73 insertions(+), 76 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index d6e3c7807f..d3e7c71207 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1189,6 +1189,9 @@ struct brw_context */ struct hash_table *oa_metrics_table; + /* Location of the device's sysfs entry. */ + char sysfs_dev_dir[256]; + struct brw_perf_query_info *queries; int n_queries; diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c index 622c2d2d95..a084b30fe7 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c @@ -319,6 +319,47 @@ brw_perf_query(struct gl_perf_query_object *o) /**/ static bool +read_file_uint64(const char *file, uint64_t *val) +{ +char buf[32]; +int fd, n; + +fd = open(file, 0); +if (fd < 0) + return false; +while ((n = read(fd, buf, sizeof (buf) - 1)) < 0 && + errno == EINTR); +close(fd); +if (n < 0) + return false; + +buf[n] = '\0'; +*val = strtoull(buf, NULL, 0); + +return true; +} + +static bool +read_sysfs_drm_device_file_uint64(struct brw_context *brw, + const char *file, + uint64_t *value) +{ + char buf[512]; + int len; + + len = snprintf(buf, sizeof(buf), "%s/%s", + brw->perfquery.sysfs_dev_dir, file); + if (len < 0 || len >= sizeof(buf)) { + DBG("Failed to concatenate sys filename to read u64 from\n"); + return false; + } + + return read_file_uint64(buf, value); +} + +/**/ + +static bool brw_is_perf_query_ready(struct gl_context *ctx, struct gl_perf_query_object *o); @@ -1746,27 +1787,6 @@ init_pipeline_statistic_query_registers(struct brw_context *brw) query->data_size = sizeof(uint64_t) * query->n_counters; } -static bool -read_file_uint64(const char *file, uint64_t *val) -{ -char buf[32]; -int fd, n; - -fd = open(file, 0); -if (fd < 0) - return false; -while ((n = read(fd, buf, sizeof (buf) - 1)) < 0 && - errno == EINTR); -close(fd); -if (n < 0) - return false; - -buf[n] = '\0'; -*val = strtoull(buf, NULL, 0); - -return true; -} - static void register_oa_config(struct brw_context *brw, const struct brw_perf_query_info *query, @@ -1780,14 +1800,14 @@ register_oa_config(struct brw_context *brw, } static void -enumerate_sysfs_metrics(struct brw_context *brw, const char *sysfs_dev_dir) +enumerate_sysfs_metrics(struct brw_context *brw) { char buf[256]; DIR *metricsdir = NULL; struct dirent *metric_entry; int len; - len = snprintf(buf, sizeof(buf), "%s/metrics", sysfs_dev_dir); + len = snprintf(buf, sizeof(buf), "%s/metrics", brw->perfquery.sysfs_dev_dir); if (len < 0 || len >= sizeof(buf)) { DBG("Failed to concatenate path to sysfs metrics/ directory\n"); return; @@ -1814,7 +1834,7 @@ enumerate_sysfs_metrics(struct brw_context *brw, const char *sysfs_dev_dir) uint64_t id; len = snprintf(buf, sizeof(buf), "%s/metrics/%s/id", -sysfs_dev_dir, metric_entry->d_name); +brw->perfquery.sysfs_dev_dir, metric_entry->d_name); if (len < 0 || len >= sizeof(buf)) { DBG("Failed to concatenate path to sysfs metric id file\n"); continue; @@ -1834,37 +1854,18 @@ enumerate_sysfs_metrics(struct brw_context *brw, const char *sysfs_dev_dir) } static bool -read_sysfs_drm_device_file_uint64(struct brw_context *brw, - const char *sysfs_dev_dir, - const char *file, - uint64_t *value) -{ - char buf[512]; - int len; - - len = snprintf(buf, sizeof(buf), "%s/%s", sysfs_dev_dir, file); - if (len < 0 || len >= sizeof(buf)) { - DBG("Failed to concatenate sys filename to read u64
Mesa (master): i965: perf: default case for unknown query types
Module: Mesa Branch: master Commit: 80cd669a320fa0666d50f8427980401136f8f667 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=80cd669a320fa0666d50f8427980401136f8f667 Author: Lionel LandwerlinDate: Tue Feb 6 17:29:32 2018 + i965: perf: default case for unknown query types Just some extra safety before further changes. Signed-off-by: Lionel Landwerlin Reviewed-by: Kenneth Graunke Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_performance_query.c | 32 ++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c index 613e61653f..8cb9d8277a 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c @@ -341,6 +341,9 @@ dump_perf_query_callback(GLuint id, void *query_void, void *brw_void) o->Active ? "Active," : (o->Ready ? "Ready," : "Pending,"), obj->pipeline_stats.bo ? "yes" : "no"); break; + default: + unreachable("Unknown query type"); + break; } } @@ -437,6 +440,10 @@ brw_get_perf_query_info(struct gl_context *ctx, case PIPELINE_STATS: *n_active = brw->perfquery.n_active_pipeline_stats_queries; break; + + default: + unreachable("Unknown query type"); + break; } } @@ -1265,6 +1272,10 @@ brw_begin_perf_query(struct gl_context *ctx, ++brw->perfquery.n_active_pipeline_stats_queries; break; + + default: + unreachable("Unknown query type"); + break; } if (INTEL_DEBUG & DEBUG_PERFMON) @@ -1321,6 +1332,10 @@ brw_end_perf_query(struct gl_context *ctx, STATS_BO_END_OFFSET_BYTES); --brw->perfquery.n_active_pipeline_stats_queries; break; + + default: + unreachable("Unknown query type"); + break; } } @@ -1341,6 +1356,10 @@ brw_wait_perf_query(struct gl_context *ctx, struct gl_perf_query_object *o) case PIPELINE_STATS: bo = obj->pipeline_stats.bo; break; + + default: + unreachable("Unknown query type"); + break; } if (bo == NULL) @@ -1386,9 +1405,12 @@ brw_is_perf_query_ready(struct gl_context *ctx, return (obj->pipeline_stats.bo && !brw_batch_references(>batch, obj->pipeline_stats.bo) && !brw_bo_busy(obj->pipeline_stats.bo)); + + default: + unreachable("Unknown query type"); + break; } - unreachable("missing ready check for unknown query kind"); return false; } @@ -1502,6 +1524,10 @@ brw_get_perf_query_data(struct gl_context *ctx, case PIPELINE_STATS: written = get_pipeline_stats_data(brw, obj, data_size, (uint8_t *)data); break; + + default: + unreachable("Unknown query type"); + break; } if (bytes_written) @@ -1567,6 +1593,10 @@ brw_delete_perf_query(struct gl_context *ctx, obj->pipeline_stats.bo = NULL; } break; + + default: + unreachable("Unknown query type"); + break; } free(obj); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): i965: perf: reuse timescale base function from query
Module: Mesa Branch: master Commit: e4387faafb1455b92d5e9620df9754cae1cd07e8 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e4387faafb1455b92d5e9620df9754cae1cd07e8 Author: Lionel LandwerlinDate: Tue Mar 6 15:47:00 2018 + i965: perf: reuse timescale base function from query We already have the same function in brw_queryobj.c Signed-off-by: Lionel Landwerlin Reviewed-by: Kenneth Graunke Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_performance_query.c | 13 ++--- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c index a084b30fe7..d0faf4a2cb 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c @@ -613,15 +613,6 @@ drop_from_unaccumulated_query_list(struct brw_context *brw, reap_old_sample_buffers(brw); } -static uint64_t -timebase_scale(struct brw_context *brw, uint32_t u32_time_delta) -{ - const struct gen_device_info *devinfo = >screen->devinfo; - uint64_t tmp = ((uint64_t)u32_time_delta) * 10ull; - - return tmp ? tmp / devinfo->timestamp_frequency : 0; -} - static void accumulate_uint32(const uint32_t *report0, const uint32_t *report1, @@ -943,13 +934,13 @@ accumulate_oa_reports(struct brw_context *brw, /* Ignore reports that come before the start marker. * (Note: takes care to allow overflow of 32bit timestamps) */ -if (timebase_scale(brw, report[1] - start[1]) > 50) +if (brw_timebase_scale(brw, report[1] - start[1]) > 50) continue; /* Ignore reports that come after the end marker. * (Note: takes care to allow overflow of 32bit timestamps) */ -if (timebase_scale(brw, report[1] - end[1]) <= 50) +if (brw_timebase_scale(brw, report[1] - end[1]) <= 50) goto end; /* For Gen8+ since the counters continue while other ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): i965: perf: store the hw_id of the context in the query
Module: Mesa Branch: master Commit: 5742b17da1f067aad592176e787abcb0bbfb0ebe URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5742b17da1f067aad592176e787abcb0bbfb0ebe Author: Lionel LandwerlinDate: Wed Feb 7 18:10:57 2018 + i965: perf: store the hw_id of the context in the query Signed-off-by: Lionel Landwerlin Reviewed-by: Kenneth Graunke Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_performance_query.c | 17 +++-- 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c index 8cb9d8277a..622c2d2d95 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c @@ -279,6 +279,11 @@ struct brw_perf_query_object uint64_t accumulator[MAX_OA_REPORT_COUNTERS]; /** + * Hw ID used by the context on which the query was running. + */ + uint32_t hw_id; + + /** * false while in the unaccumulated_elements list, and set to * true when the final, end MI_RPC snapshot has been * accumulated. @@ -844,7 +849,6 @@ accumulate_oa_reports(struct brw_context *brw, uint32_t *end; struct exec_node *first_samples_node; bool in_ctx = true; - uint32_t ctx_id; int out_duration = 0; assert(o->Ready); @@ -862,7 +866,7 @@ accumulate_oa_reports(struct brw_context *brw, goto error; } - ctx_id = start[2]; + obj->oa.hw_id = start[2]; /* See if we have any periodic reports to accumulate too... */ @@ -917,11 +921,11 @@ accumulate_oa_reports(struct brw_context *brw, * of OA counters while any other context is acctive. */ if (devinfo->gen >= 8) { - if (in_ctx && report[2] != ctx_id) { + if (in_ctx && report[2] != obj->oa.hw_id) { DBG("i915 perf: Switch AWAY (observed by ID change)\n"); in_ctx = false; out_duration = 0; - } else if (in_ctx == false && report[2] == ctx_id) { + } else if (in_ctx == false && report[2] == obj->oa.hw_id) { DBG("i915 perf: Switch TO\n"); in_ctx = true; @@ -938,10 +942,10 @@ accumulate_oa_reports(struct brw_context *brw, if (out_duration >= 1) add = false; } else if (in_ctx) { - assert(report[2] == ctx_id); + assert(report[2] == obj->oa.hw_id); DBG("i915 perf: Continuation IN\n"); } else { - assert(report[2] != ctx_id); + assert(report[2] != obj->oa.hw_id); DBG("i915 perf: Continuation OUT\n"); add = false; out_duration++; @@ -1251,6 +1255,7 @@ brw_begin_perf_query(struct gl_context *ctx, */ buf->refcount++; + obj->oa.hw_id = 0x; memset(obj->oa.accumulator, 0, sizeof(obj->oa.accumulator)); obj->oa.results_accumulated = false; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radeonsi: expand constbuf 0 address correctly to fix Vega10 hangs
Module: Mesa Branch: master Commit: 35cd86d4e999149dcb51585c0e2a3a50a74c7bcb URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=35cd86d4e999149dcb51585c0e2a3a50a74c7bcb Author: Marek OlšákDate: Wed Mar 7 11:36:26 2018 -0500 radeonsi: expand constbuf 0 address correctly to fix Vega10 hangs This is only required with the latest libdrm. This fixes 32-bit support with high addresses. (and possibly 64-bit support too because the high bits need to be masked out) Acked-by: Christian König Acked-by: Alex Deucher --- src/gallium/drivers/radeonsi/si_shader.c | 21 + 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index fa7a19cb3a..95258b74f7 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2427,12 +2427,25 @@ static LLVMValueRef fetch_constant( * addresses generates horrible VALU code with very high * VGPR usage and very low SIMD occupancy. */ - ptr = LLVMBuildPtrToInt(ctx->ac.builder, ptr, ctx->i64, ""); - ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ctx->v2i32, ""); + ptr = LLVMBuildPtrToInt(ctx->ac.builder, ptr, ctx->ac.intptr, ""); + + LLVMValueRef desc0, desc1; + if (HAVE_32BIT_POINTERS) { + desc0 = ptr; + desc1 = LLVMConstInt(ctx->i32, + S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0); + } else { + ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ctx->v2i32, ""); + desc0 = LLVMBuildExtractElement(ctx->ac.builder, ptr, ctx->i32_0, ""); + desc1 = LLVMBuildExtractElement(ctx->ac.builder, ptr, ctx->i32_1, ""); + /* Mask out all bits except BASE_ADDRESS_HI. */ + desc1 = LLVMBuildAnd(ctx->ac.builder, desc1, +LLVMConstInt(ctx->i32, ~C_008F04_BASE_ADDRESS_HI, 0), ""); + } LLVMValueRef desc_elems[] = { - LLVMBuildExtractElement(ctx->ac.builder, ptr, ctx->i32_0, ""), - LLVMBuildExtractElement(ctx->ac.builder, ptr, ctx->i32_1, ""), + desc0, + desc1, LLVMConstInt(ctx->i32, (sel->info.const_file_max[0] + 1) * 16, 0), LLVMConstInt(ctx->i32, S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radeonsi: align command buffer starting address to fix some Raven hangs
Module: Mesa Branch: master Commit: 75c5d25f0f34cd70246ee1b0b77a75ec82dfcecb URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=75c5d25f0f34cd70246ee1b0b77a75ec82dfcecb Author: Marek OlšákDate: Tue Mar 6 19:07:58 2018 -0500 radeonsi: align command buffer starting address to fix some Raven hangs Cc: 17.3 18.0 Reviewed-by: Christian König Reviewed-by: Alex Deucher --- src/amd/common/ac_gpu_info.c | 21 - src/amd/common/ac_gpu_info.h | 1 + src/gallium/drivers/radeonsi/si_pm4.c | 5 +++-- src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 5 +++-- src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 1 + 5 files changed, 28 insertions(+), 5 deletions(-) diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index 146098baa0..7c13e5f70b 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -98,7 +98,9 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev, { struct amdgpu_buffer_size_alignments alignment_info = {}; struct amdgpu_heap_info vram, vram_vis, gtt; - struct drm_amdgpu_info_hw_ip dma = {}, compute = {}, uvd = {}, uvd_enc = {}, vce = {}, vcn_dec = {}, vcn_enc = {}; + struct drm_amdgpu_info_hw_ip dma = {}, compute = {}, uvd = {}; + struct drm_amdgpu_info_hw_ip uvd_enc = {}, vce = {}, vcn_dec = {}; + struct drm_amdgpu_info_hw_ip vcn_enc = {}, gfx = {}; uint32_t vce_version = 0, vce_feature = 0, uvd_version = 0, uvd_feature = 0; int r, i, j; drmDevicePtr devinfo; @@ -154,6 +156,12 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev, return false; } + r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_GFX, 0, ); + if (r) { + fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(gfx) failed.\n"); + return false; + } + r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_COMPUTE, 0, ); if (r) { fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(compute) failed.\n"); @@ -340,6 +348,17 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev, if (info->chip_class == SI) info->gfx_ib_pad_with_type2 = TRUE; + unsigned ib_align = 0; + ib_align = MAX2(ib_align, gfx.ib_start_alignment); + ib_align = MAX2(ib_align, compute.ib_start_alignment); + ib_align = MAX2(ib_align, dma.ib_start_alignment); + ib_align = MAX2(ib_align, uvd.ib_start_alignment); + ib_align = MAX2(ib_align, uvd_enc.ib_start_alignment); + ib_align = MAX2(ib_align, vce.ib_start_alignment); + ib_align = MAX2(ib_align, vcn_dec.ib_start_alignment); + ib_align = MAX2(ib_align, vcn_enc.ib_start_alignment); + info->ib_start_alignment = ib_align; + return true; } diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index 7c86dc1cb6..0beba9604a 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -62,6 +62,7 @@ struct radeon_info { boolhas_virtual_memory; boolgfx_ib_pad_with_type2; boolhas_hw_decode; + unsignedib_start_alignment; uint32_tnum_sdma_rings; uint32_tnum_compute_rings; uint32_tuvd_fw_version; diff --git a/src/gallium/drivers/radeonsi/si_pm4.c b/src/gallium/drivers/radeonsi/si_pm4.c index 96e4e1dd1a..f4c41f5ffa 100644 --- a/src/gallium/drivers/radeonsi/si_pm4.c +++ b/src/gallium/drivers/radeonsi/si_pm4.c @@ -167,8 +167,9 @@ void si_pm4_upload_indirect_buffer(struct si_context *sctx, r600_resource_reference(>indirect_buffer, NULL); state->indirect_buffer = (struct r600_resource*) - pipe_buffer_create(screen, 0, - PIPE_USAGE_DEFAULT, aligned_ndw * 4); + si_aligned_buffer_create(screen, 0, +PIPE_USAGE_DEFAULT, aligned_ndw * 4, +sctx->screen->info.ib_start_alignment); if (!state->indirect_buffer) return; diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 92d5394b12..d9a95c0509 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -800,10 +800,11 @@ static void amdgpu_set_ib_size(struct amdgpu_ib *ib) } } -static void amdgpu_ib_finalize(struct amdgpu_ib *ib) +static void amdgpu_ib_finalize(struct amdgpu_winsys *ws, struct amdgpu_ib *ib) { amdgpu_set_ib_size(ib); ib->used_ib_space += ib->base.current.cdw * 4; + ib->used_ib_space = align(ib->used_ib_space, ws->info.ib_start_alignment);
Mesa (master): winsys/amdgpu: query GDS info
Module: Mesa Branch: master Commit: 78ef16e2f921355280c6ec92fc28753e6dd4d541 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=78ef16e2f921355280c6ec92fc28753e6dd4d541 Author: Marek OlšákDate: Sun Sep 11 21:53:20 2016 +0200 winsys/amdgpu: query GDS info Reviewed-by: Alex Deucher --- src/amd/common/ac_gpu_info.c | 11 +++ src/amd/common/ac_gpu_info.h | 2 ++ 2 files changed, 13 insertions(+) diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index 7c13e5f70b..29e2aa8a52 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -101,6 +101,7 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev, struct drm_amdgpu_info_hw_ip dma = {}, compute = {}, uvd = {}; struct drm_amdgpu_info_hw_ip uvd_enc = {}, vce = {}, vcn_dec = {}; struct drm_amdgpu_info_hw_ip vcn_enc = {}, gfx = {}; + struct amdgpu_gds_resource_info gds = {}; uint32_t vce_version = 0, vce_feature = 0, uvd_version = 0, uvd_feature = 0; int r, i, j; drmDevicePtr devinfo; @@ -248,6 +249,12 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev, return false; } + r = amdgpu_query_gds_info(dev, ); + if (r) { + fprintf(stderr, "amdgpu: amdgpu_query_gds_info failed.\n"); + return false; + } + /* Set chip identification. */ info->pci_id = amdinfo->asic_id; /* TODO: is this correct? */ info->vce_harvest_config = amdinfo->vce_harvest_config; @@ -283,6 +290,8 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev, info->gart_size = gtt.heap_size; info->vram_size = vram.heap_size; info->vram_vis_size = vram_vis.heap_size; + info->gds_size = gds.gds_total_size; + info->gds_gfx_partition_size = gds.gds_gfx_partition_size; /* The kernel can split large buffers in VRAM but not in GTT, so large * allocations can fail or cause buffer movement failures in the kernel. */ @@ -403,6 +412,8 @@ void ac_print_gpu_info(struct radeon_info *info) printf("gart_size = %i MB\n", (int)DIV_ROUND_UP(info->gart_size, 1024*1024)); printf("vram_size = %i MB\n", (int)DIV_ROUND_UP(info->vram_size, 1024*1024)); printf("vram_vis_size = %i MB\n", (int)DIV_ROUND_UP(info->vram_vis_size, 1024*1024)); + printf("gds_size = %u kB\n", info->gds_size / 1024); + printf("gds_gfx_partition_size = %u kB\n", info->gds_gfx_partition_size / 1024); printf("max_alloc_size = %i MB\n", (int)DIV_ROUND_UP(info->max_alloc_size, 1024*1024)); printf("min_alloc_size = %u\n", info->min_alloc_size); diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index 0beba9604a..34d91bec14 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -55,6 +55,8 @@ struct radeon_info { uint64_tgart_size; uint64_tvram_size; uint64_tvram_vis_size; + unsignedgds_size; + unsignedgds_gfx_partition_size; uint64_tmax_alloc_size; uint32_tmin_alloc_size; uint32_taddress32_hi; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): winsys/amdgpu: pad compute IBs
Module: Mesa Branch: master Commit: a4a113b5bc8e3248ebcfeac6f9c9ff24e85caadd URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a4a113b5bc8e3248ebcfeac6f9c9ff24e85caadd Author: Marek OlšákDate: Tue Mar 6 15:03:09 2018 -0500 winsys/amdgpu: pad compute IBs v2: pad with PKT2 NOPs on SI Reviewed-by: Alex Deucher --- src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index d9a95c0509..a3feeb9302 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -1528,6 +1528,7 @@ static int amdgpu_cs_flush(struct radeon_winsys_cs *rcs, } break; case RING_GFX: + case RING_COMPUTE: /* pad GFX ring to 8 DWs to meet CP fetch alignment requirements */ if (ws->info.gfx_ib_pad_with_type2) { while (rcs->current.cdw & 7) @@ -1536,7 +1537,8 @@ static int amdgpu_cs_flush(struct radeon_winsys_cs *rcs, while (rcs->current.cdw & 7) radeon_emit(rcs, 0x1000); /* type3 nop packet */ } - ws->gfx_ib_size_counter += (rcs->prev_dw + rcs->current.cdw) * 4; + if (cs->ring_type == RING_GFX) + ws->gfx_ib_size_counter += (rcs->prev_dw + rcs->current.cdw) * 4; break; case RING_UVD: case RING_UVD_ENC: ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radeonsi: remove chip_class parameter from si_lower_nir
Module: Mesa Branch: master Commit: 9b7db1281521b95acb04db624bf93193fd3d4f56 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=9b7db1281521b95acb04db624bf93193fd3d4f56 Author: Marek OlšákDate: Tue Mar 6 18:30:06 2018 -0500 radeonsi: remove chip_class parameter from si_lower_nir We can get it from si_screen. Reviewed-by: Timothy Arceri Acked-by: Alex Deucher --- src/gallium/drivers/radeonsi/si_compute.c | 3 +-- src/gallium/drivers/radeonsi/si_shader.h| 4 +--- src/gallium/drivers/radeonsi/si_shader_nir.c| 6 +++--- src/gallium/drivers/radeonsi/si_state_shaders.c | 3 +-- 4 files changed, 6 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 92d4514071..46873ccce7 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -107,7 +107,7 @@ static void si_create_compute_state_async(void *job, int thread_index) sel.nir = program->ir.nir; si_nir_scan_shader(sel.nir, ); - si_lower_nir(, program->compiler_ctx_state.chip_class); + si_lower_nir(); } @@ -186,7 +186,6 @@ static void *si_create_compute_state( program->ir.nir = (struct nir_shader *) cso->prog; } - program->compiler_ctx_state.chip_class = sctx->b.chip_class; program->compiler_ctx_state.debug = sctx->debug; program->compiler_ctx_state.is_debug_context = sctx->is_debug; p_atomic_inc(>num_shaders_created); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 23f9d20e19..f58978989d 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -307,8 +307,6 @@ struct si_shader; /* State of the context creating the shader object. */ struct si_compiler_ctx_state { - enum chip_class chip_class; - /* Should only be used by si_init_shader_selector_async and * si_build_shader_variant if thread_index == -1 (non-threaded). */ LLVMTargetMachineReftm; @@ -674,7 +672,7 @@ void si_nir_scan_shader(const struct nir_shader *nir, void si_nir_scan_tess_ctrl(const struct nir_shader *nir, const struct tgsi_shader_info *info, struct tgsi_tessctrl_info *out); -void si_lower_nir(struct si_shader_selector *sel, enum chip_class chip_class); +void si_lower_nir(struct si_shader_selector *sel); /* Inline helpers. */ diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index 54ab0afca6..e5377358dc 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -21,8 +21,8 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "si_shader.h" #include "si_shader_internal.h" +#include "si_pipe.h" #include "ac_nir_to_llvm.h" @@ -623,7 +623,7 @@ void si_nir_scan_shader(const struct nir_shader *nir, * selector is created. */ void -si_lower_nir(struct si_shader_selector* sel, enum chip_class chip_class) +si_lower_nir(struct si_shader_selector* sel) { /* Adjust the driver location of inputs and outputs. The state tracker * interprets them as slots, while the ac/nir backend interprets them @@ -673,7 +673,7 @@ si_lower_nir(struct si_shader_selector* sel, enum chip_class chip_class) }; NIR_PASS_V(sel->nir, nir_lower_subgroups, _options); - ac_lower_indirect_derefs(sel->nir, chip_class); + ac_lower_indirect_derefs(sel->nir, sel->screen->info.chip_class); bool progress; do { diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 8e02a04898..8fe4c04ae7 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1999,7 +1999,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx, si_nir_scan_shader(sel->nir, >info); si_nir_scan_tess_ctrl(sel->nir, >info, >tcs_info); - si_lower_nir(sel, sctx->b.chip_class); + si_lower_nir(sel); } sel->type = sel->info.processor; @@ -3121,7 +3121,6 @@ bool si_update_shaders(struct si_context *sctx) old_ps ? old_ps->key.part.ps.epilog.spi_shader_col_format : 0; int r; - compiler_state.chip_class = sctx->b.chip_class; compiler_state.tm = sctx->tm; compiler_state.debug = sctx->debug; compiler_state.is_debug_context = sctx->is_debug; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org
Mesa (master): etnaviv: add query_group_info for sw counters
Module: Mesa Branch: master Commit: 3d912bd742edc2f66758b25b36371e581fd62d45 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3d912bd742edc2f66758b25b36371e581fd62d45 Author: Christian GmeinerDate: Mon Mar 5 23:26:42 2018 +0100 etnaviv: add query_group_info for sw counters Signed-off-by: Christian Gmeiner Reviewed-by: Lucas Stach --- src/gallium/drivers/etnaviv/etnaviv_query_sw.c | 30 -- src/gallium/drivers/etnaviv/etnaviv_query_sw.h | 5 + 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/etnaviv/etnaviv_query_sw.c b/src/gallium/drivers/etnaviv/etnaviv_query_sw.c index 2e65065b28..f955d8e210 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_query_sw.c +++ b/src/gallium/drivers/etnaviv/etnaviv_query_sw.c @@ -118,16 +118,16 @@ etna_sw_create_query(struct etna_context *ctx, unsigned query_type) return q; } +static const struct pipe_driver_query_info list[] = { + {"prims-emitted", PIPE_QUERY_PRIMITIVES_EMITTED, { 0 }}, + {"draw-calls", ETNA_QUERY_DRAW_CALLS, { 0 }}, + {"rs-operations", ETNA_QUERY_RS_OPERATIONS, { 0 }}, +}; + int etna_sw_get_driver_query_info(struct pipe_screen *pscreen, unsigned index, struct pipe_driver_query_info *info) { - static const struct pipe_driver_query_info list[] = { - {"prims-emitted", PIPE_QUERY_PRIMITIVES_EMITTED, { 0 }}, - {"draw-calls", ETNA_QUERY_DRAW_CALLS, { 0 }}, - {"rs-operations", ETNA_QUERY_RS_OPERATIONS, { 0 }}, - }; - if (!info) return ARRAY_SIZE(list); @@ -138,3 +138,21 @@ etna_sw_get_driver_query_info(struct pipe_screen *pscreen, unsigned index, return 1; } + +int +etna_sw_get_driver_query_group_info(struct pipe_screen *pscreen, +unsigned index, +struct pipe_driver_query_group_info *info) +{ + if (!info) + return ARRAY_SIZE(list); + + if (index != 0) + return 0; + + info->name = "driver"; + info->max_active_queries = ARRAY_SIZE(list); + info->num_queries = ARRAY_SIZE(list); + + return 1; +} diff --git a/src/gallium/drivers/etnaviv/etnaviv_query_sw.h b/src/gallium/drivers/etnaviv/etnaviv_query_sw.h index 9321147094..f5a2dbef1a 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_query_sw.h +++ b/src/gallium/drivers/etnaviv/etnaviv_query_sw.h @@ -51,4 +51,9 @@ int etna_sw_get_driver_query_info(struct pipe_screen *pscreen, unsigned index, struct pipe_driver_query_info *info); +int +etna_sw_get_driver_query_group_info(struct pipe_screen *pscreen, +unsigned index, +struct pipe_driver_query_group_info *info); + #endif ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): etnaviv: add get_driver_query_group_info(..)
Module: Mesa Branch: master Commit: 5b68a7297d2a610faeb7353c8e49910ea1b16d43 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5b68a7297d2a610faeb7353c8e49910ea1b16d43 Author: Christian GmeinerDate: Mon Mar 5 23:26:43 2018 +0100 etnaviv: add get_driver_query_group_info(..) This enables AMD_performance_monitor extension. Signed-off-by: Christian Gmeiner Reviewed-by: Lucas Stach --- src/gallium/drivers/etnaviv/etnaviv_query.c | 13 + 1 file changed, 13 insertions(+) diff --git a/src/gallium/drivers/etnaviv/etnaviv_query.c b/src/gallium/drivers/etnaviv/etnaviv_query.c index 9e897cd75a..2d257a9d34 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_query.c +++ b/src/gallium/drivers/etnaviv/etnaviv_query.c @@ -110,6 +110,18 @@ etna_get_driver_query_info(struct pipe_screen *pscreen, unsigned index, return etna_sw_get_driver_query_info(pscreen, index, info); } +static int +etna_get_driver_query_group_info(struct pipe_screen *pscreen, unsigned index, + struct pipe_driver_query_group_info *info) +{ + int nr_sw_groups = etna_sw_get_driver_query_group_info(pscreen, 0, NULL); + + if (!info) + return nr_sw_groups; + + return etna_sw_get_driver_query_group_info(pscreen, index, info); +} + static void etna_set_active_query_state(struct pipe_context *pipe, boolean enable) { @@ -119,6 +131,7 @@ void etna_query_screen_init(struct pipe_screen *pscreen) { pscreen->get_driver_query_info = etna_get_driver_query_info; + pscreen->get_driver_query_group_info = etna_get_driver_query_group_info; } void ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): meson: Fix building gallium media libs without egl
Module: Mesa Branch: master Commit: 1e9d779331544f8f039978c36430bef1c4efdb2d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1e9d779331544f8f039978c36430bef1c4efdb2d Author: Dylan BakerDate: Wed Feb 28 13:07:57 2018 -0800 meson: Fix building gallium media libs without egl v2: - rebase on omx fix Signed-off-by: Dylan Baker Reviewed-by: Eric Anholt (v1) --- meson.build | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/meson.build b/meson.build index 9950ac93b3..3dc1271f2a 100644 --- a/meson.build +++ b/meson.build @@ -1241,7 +1241,9 @@ if with_platform_x11 endif dep_glproto = dependency('glproto', version : '>= 1.4.14') endif - if with_egl + if (with_egl or ( + with_gallium_vdpau or with_gallium_xvmc or with_gallium_xa or + with_gallium_omx != 'disabled')) dep_xcb_xfixes = dependency('xcb-xfixes') endif endif ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): meson: Allow building dri based EGL without GLX
Module: Mesa Branch: master Commit: f74cf04d3e81728591999b8ca952313178c54824 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f74cf04d3e81728591999b8ca952313178c54824 Author: Dylan BakerDate: Wed Feb 28 10:13:38 2018 -0800 meson: Allow building dri based EGL without GLX It should be possible to build EGL without GLX, but the meson build currently doesn't allow that because it too tightly couples glx and dri. This patch eases dri and glx apart, so that EGL without GLX can be built. CC: Daniel Stone Reviewed-by: Eric Anholt Signed-off-by: Dylan Baker --- meson.build | 25 +++-- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/meson.build b/meson.build index 8a17d7f240..9950ac93b3 100644 --- a/meson.build +++ b/meson.build @@ -1,4 +1,4 @@ -# Copyright © 2017 Intel Corporation +# Copyright © 2017-2018 Intel Corporation # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -29,6 +29,8 @@ project( default_options : ['buildtype=debugoptimized', 'c_std=c99', 'cpp_std=c++11'] ) +system_has_kms_drm = ['openbsd', 'netbsd', 'freebsd', 'dragonfly', 'linux'].contains(host_machine.system()) + # Arguments for the preprocessor, put these in a separate array from the C and # C++ (cpp in meson terminology) arguments since they need to be added to the # default arguments for both C and C++. @@ -172,6 +174,13 @@ if _drivers != '' with_gallium_virgl = _split.contains('virgl') with_gallium_swr = _split.contains('swr') with_gallium = true + if system_has_kms_drm +_glx = get_option('glx') +_egl = get_option('egl') +if _glx == 'dri' or _egl == 'true' or (_glx == 'disabled' and _egl != 'false') + with_dri = true +endif + endif endif with_intel_vk = false @@ -217,8 +226,6 @@ if with_dri_i915 or with_gallium_i915 dep_libdrm_intel = dependency('libdrm_intel', version : '>= 2.4.75') endif -system_has_kms_drm = ['openbsd', 'netbsd', 'freebsd', 'dragonfly', 'linux'].contains(host_machine.system()) - if host_machine.system() == 'darwin' with_dri_platform = 'apple' elif ['windows', 'cygwin'].contains(host_machine.system()) @@ -272,6 +279,7 @@ if with_glx == 'auto' elif with_gallium # Even when building just gallium drivers the user probably wants dri with_glx = 'dri' +with_dri = true elif with_platform_x11 and with_any_opengl and not with_any_vk # The automatic behavior should not be to turn on xlib based glx when # building only vulkan drivers @@ -280,11 +288,6 @@ if with_glx == 'auto' with_glx = 'disabled' endif endif -if with_glx == 'dri' - if with_gallium - with_dri = true - endif -endif if not (with_dri or with_gallium or with_glx == 'xlib' or with_glx == 'gallium-xlib') with_gles1 = false @@ -314,6 +317,8 @@ elif _egl == 'true' error('EGL requires shared-glapi') elif egl_native_platform == '' error('No platforms specified, consider -Dplatforms=drm,x11 at least') + elif not ['disabled', 'dri'].contains(with_glx) +error('EGL requires dri, but a GLX is being built without dri') endif with_egl = true else @@ -633,7 +638,7 @@ endif gl_pkgconfig_c_flags = [] if with_platform_x11 - if with_any_vk or (with_glx == 'dri' and with_dri_platform == 'drm') + if with_any_vk or with_egl or (with_glx == 'dri' and with_dri_platform == 'drm') pre_args += '-DHAVE_X11_PLATFORM' endif if with_glx == 'xlib' or with_glx == 'gallium-xlib' @@ -1219,7 +1224,7 @@ if with_platform_x11 dep_xcb = dependency('xcb') dep_x11_xcb = dependency('x11-xcb') endif - if with_any_vk or (with_glx == 'dri' and with_dri_platform == 'drm') + if with_any_vk or with_egl or (with_glx == 'dri' and with_dri_platform == 'drm') dep_xcb_dri2 = dependency('xcb-dri2', version : '>= 1.8') if with_dri3 ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): glx/apple: Ship meson build file in tarball
Module: Mesa Branch: master Commit: d41ee9ba5d0f085b517bc6ce192f912fc1490e3a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d41ee9ba5d0f085b517bc6ce192f912fc1490e3a Author: Thierry RedingDate: Tue Mar 6 10:44:08 2018 +0100 glx/apple: Ship meson build file in tarball The meson build file for Apple GLX is not listed in the EXTRA_DIST make variable and therefore isn't shipped as part of the release tarball, so meson builds from the tarball will fail. Add the file to EXTRA_DIST to ensure it is included in the tarball. Reviewed-by: Dylan Baker Signed-off-by: Thierry Reding --- src/glx/apple/Makefile.am | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/glx/apple/Makefile.am b/src/glx/apple/Makefile.am index bfa18b1c2f..8f93268635 100644 --- a/src/glx/apple/Makefile.am +++ b/src/glx/apple/Makefile.am @@ -1,4 +1,6 @@ -EXTRA_DIST = RELEASE_NOTES +EXTRA_DIST = \ + RELEASE_NOTES \ + meson.build noinst_LTLIBRARIES = libappleglx.la ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): ac/nir: do not emit unnecessary null exports in fragment shaders
Module: Mesa Branch: master Commit: 4e3c1ace659c7325bd8d54b66370c3755e40a266 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4e3c1ace659c7325bd8d54b66370c3755e40a266 Author: Samuel PitoisetDate: Thu Mar 8 09:53:14 2018 +0100 ac/nir: do not emit unnecessary null exports in fragment shaders Null exports should only be needed when no other exports are emitted. This removes a bunch of 'exp null off, off, off, off done vm'. Affected games are Dota 2 and Wolfenstein 2, not sure if that really helps, but code size is decreasing there. Polaris10: Totals from affected shaders: SGPRS: 8216 -> 8216 (0.00 %) VGPRS: 7072 -> 7072 (0.00 %) Spilled SGPRs: 0 -> 0 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Code Size: 454968 -> 453896 (-0.24 %) bytes Max Waves: 772 -> 772 (0.00 %) Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen --- src/amd/common/ac_nir_to_llvm.c | 29 - 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index c785244dcc..9b85069860 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -6539,17 +6539,13 @@ handle_tcs_outputs_post(struct radv_shader_context *ctx) static bool si_export_mrt_color(struct radv_shader_context *ctx, - LLVMValueRef *color, unsigned index, bool is_last, + LLVMValueRef *color, unsigned index, struct ac_export_args *args) { /* Export */ si_llvm_init_export_args(ctx, color, 0xf, V_008DFC_SQ_EXP_MRT + index, args); - - if (is_last) { - args->valid_mask = 1; /* whether the EXEC mask is valid */ - args->done = 1; /* DONE bit */ - } else if (!args->enabled_channels) + if (!args->enabled_channels) return false; /* unnecessary NULL export */ return true; @@ -6576,7 +6572,6 @@ handle_fs_outputs_post(struct radv_shader_context *ctx) for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { LLVMValueRef values[4]; - bool last = false; if (!(ctx->output_mask & (1ull << i))) continue; @@ -6588,14 +6583,9 @@ handle_fs_outputs_post(struct radv_shader_context *ctx) values[j] = ac_to_float(>ac, radv_load_output(ctx, i, j)); - if (!ctx->shader_info->info.ps.writes_z && - !ctx->shader_info->info.ps.writes_stencil && - !ctx->shader_info->info.ps.writes_sample_mask) - last = ctx->output_mask <= ((1ull << (i + 1)) - 1); - bool ret = si_export_mrt_color(ctx, values, i - FRAG_RESULT_DATA0, - last, _args[index]); + _args[index]); if (ret) index++; } @@ -6614,6 +6604,19 @@ handle_fs_outputs_post(struct radv_shader_context *ctx) radv_load_output(ctx, FRAG_RESULT_SAMPLE_MASK, 0)); } + /* Set the DONE bit on last non-null color export only if Z isn't +* exported. +*/ + if (index > 0 && + !ctx->shader_info->info.ps.writes_z && + !ctx->shader_info->info.ps.writes_stencil && + !ctx->shader_info->info.ps.writes_sample_mask) { + unsigned last = index - 1; + + color_args[last].valid_mask = 1; /* whether the EXEC mask is valid */ + color_args[last].done = 1; /* DONE bit */ + } + /* Export PS outputs. */ for (unsigned i = 0; i < index; i++) ac_build_export(>ac, _args[i]); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): drirc: whitespace fix
Module: Mesa Branch: master Commit: 19dd7f007e62d3962a5a9f1d92332e8e23e0deee URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=19dd7f007e62d3962a5a9f1d92332e8e23e0deee Author: Eric EngestromDate: Thu Mar 8 09:52:16 2018 + drirc: whitespace fix Signed-off-by: Eric Engestrom --- src/util/drirc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/util/drirc b/src/util/drirc index c964588e72..250cc784b3 100644 --- a/src/util/drirc +++ b/src/util/drirc @@ -277,9 +277,9 @@ TODO: document the other workarounds. - + - + ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit