Module: Mesa Branch: main Commit: 0af08acca5f647ac939bd68181f25d8afe5aa1ac URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0af08acca5f647ac939bd68181f25d8afe5aa1ac
Author: Alyssa Rosenzweig <[email protected]> Date: Thu Nov 24 20:40:42 2022 -0500 nir: Add intrinsics for lowering UBOs/VBOs on AGX We'll use formatted loads and some system values to lower UBOs and VBOs to global memory in NIR, using the AGX-specific format support and addressing arithmetic to optimize the emitted code. Add the intrinsics and teach nir_opt_preamble how to move them so we don't regress UBO pushing. Signed-off-by: Alyssa Rosenzweig <[email protected]> Acked-by: Karol Herbst <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19996> --- src/compiler/nir/nir_intrinsics.py | 25 +++++++++++++++++++++++++ src/compiler/nir/nir_opt_preamble.c | 3 +++ 2 files changed, 28 insertions(+) diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index d9db0afc3b4..dad90ab4e78 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -278,6 +278,9 @@ index("bool", "synchronous") # Value ID to identify SSA value loaded/stored on the stack index("unsigned", "value_id") +# Whether to sign-extend offsets in address arithmatic (else zero extend) +index("bool", "sign_extend") + intrinsic("nop", flags=[CAN_ELIMINATE]) intrinsic("convert_alu_types", dest_comp=0, src_comp=[0], @@ -1591,6 +1594,22 @@ store("local_pixel_agx", [1], [BASE, WRITE_MASK, FORMAT], [CAN_REORDER]) intrinsic("block_image_store_agx", [1, 1], bit_sizes=[32, 16], indices=[FORMAT, IMAGE_DIM], flags=[CAN_REORDER]) +# Formatted loads. The format is the pipe_format in memory (see +# agx_internal_formats.h for the supported list). This accesses: +# +# address + extend(index) << (format shift + shift) +# +# The nir_intrinsic_base() index encodes the shift. The sign_extend index +# determines whether sign- or zero-extension is used for the index. +# +# All loads on AGX uses these hardware instructions, so while these are +# logically load_global_agx (etc), the _global is omitted as it adds nothing. +# +# src[] = { address, index }. +load("agx", [1, 1], [ACCESS, BASE, FORMAT, SIGN_EXTEND], [CAN_ELIMINATE]) +load("constant_agx", [1, 1], [ACCESS, BASE, FORMAT, SIGN_EXTEND], + [CAN_ELIMINATE, CAN_REORDER]) + # Logical complement of load_front_face, mapping to an AGX system value system_value("back_face_agx", 1, bit_sizes=[1, 32]) @@ -1598,6 +1617,12 @@ system_value("back_face_agx", 1, bit_sizes=[1, 32]) # the referenced array has stride 24. system_value("texture_base_agx", 1, bit_sizes=[64]) +# Load the base address of an indexed UBO/VBO (for lowering UBOs/VBOs) +intrinsic("load_ubo_base_agx", src_comp=[1], dest_comp=1, bit_sizes=[64], + flags=[CAN_ELIMINATE, CAN_REORDER]) +intrinsic("load_vbo_base_agx", src_comp=[1], dest_comp=1, bit_sizes=[64], + flags=[CAN_ELIMINATE, CAN_REORDER]) + # Intel-specific query for loading from the brw_image_param struct passed # into the shader as a uniform. The variable is a deref to the image # variable. The const index specifies which of the six parameters to load. diff --git a/src/compiler/nir/nir_opt_preamble.c b/src/compiler/nir/nir_opt_preamble.c index b46426779ff..ee3ffacf41f 100644 --- a/src/compiler/nir/nir_opt_preamble.c +++ b/src/compiler/nir/nir_opt_preamble.c @@ -153,6 +153,8 @@ can_move_intrinsic(nir_intrinsic_instr *instr, opt_preamble_ctx *ctx) case nir_intrinsic_load_cull_any_enabled_amd: case nir_intrinsic_load_cull_small_prim_precision_amd: case nir_intrinsic_load_texture_base_agx: + case nir_intrinsic_load_ubo_base_agx: + case nir_intrinsic_load_vbo_base_agx: return true; /* Intrinsics which can be moved depending on hardware */ @@ -195,6 +197,7 @@ can_move_intrinsic(nir_intrinsic_instr *instr, opt_preamble_ctx *ctx) case nir_intrinsic_masked_swizzle_amd: case nir_intrinsic_load_ssbo_address: case nir_intrinsic_bindless_resource_ir3: + case nir_intrinsic_load_constant_agx: return can_move_srcs(&instr->instr, ctx); /* Image/SSBO loads can be moved if they are CAN_REORDER and their
