Module: Mesa Branch: main Commit: 275afe73f749790918a7acf1d20ec430fbb90a52 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=275afe73f749790918a7acf1d20ec430fbb90a52
Author: Mary Guillemard <mary.guillem...@collabora.com> Date: Thu Oct 12 17:45:53 2023 +0200 nvk: Implement VK_KHR_fragment_shader_barycentric Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/9619 Signed-off-by: Mary Guillemard <mary.guillem...@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26224> --- docs/features.txt | 2 +- src/nouveau/compiler/nak_encode_sm70.rs | 15 ++++ src/nouveau/compiler/nak_from_nir.rs | 25 +++++++ src/nouveau/compiler/nak_ir.rs | 23 ++++++ src/nouveau/compiler/nak_nir.c | 120 ++++++++++++++++++++++++++----- src/nouveau/compiler/nak_private.h | 10 +++ src/nouveau/vulkan/nvk_physical_device.c | 9 +++ src/nouveau/vulkan/nvk_shader.c | 1 + 8 files changed, 185 insertions(+), 20 deletions(-) diff --git a/docs/features.txt b/docs/features.txt index 7077285d981..379192a08db 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -512,7 +512,7 @@ Khronos extensions that are not part of any Vulkan version: VK_KHR_external_memory_win32 DONE (dzn) VK_KHR_external_semaphore_fd DONE (anv, dzn, hasvk, nvk, pvr, radv, tu, v3dv, vn) VK_KHR_external_semaphore_win32 DONE (dzn) - VK_KHR_fragment_shader_barycentric DONE (radv/gfx10.3+) + VK_KHR_fragment_shader_barycentric DONE (nvk/Turing+, radv/gfx10.3+) VK_KHR_fragment_shading_rate DONE (anv/gen11+, radv/gfx10.3+) VK_KHR_get_display_properties2 DONE (anv, pvr, radv, tu, v3dv) VK_KHR_get_surface_capabilities2 DONE (anv, lvp, nvk, pvr, radv, tu, v3dv, vn) diff --git a/src/nouveau/compiler/nak_encode_sm70.rs b/src/nouveau/compiler/nak_encode_sm70.rs index 1e5d9ecfedc..f11ade5622a 100644 --- a/src/nouveau/compiler/nak_encode_sm70.rs +++ b/src/nouveau/compiler/nak_encode_sm70.rs @@ -1549,6 +1549,20 @@ impl SM70Instr { self.set_pred_dst(81..84, Dst::None); } + fn encode_ldtram(&mut self, op: &OpLdTram) { + self.set_opcode(0x3ad); + self.set_dst(op.dst); + self.set_ureg(24..32, RegRef::zero(RegFile::UGPR, 1)); + + assert!(op.addr % 4 == 0); + self.set_field(64..72, op.addr >> 2); + + self.set_bit(72, op.use_c); + + // Unknown but required + self.set_bit(91, true); + } + fn encode_cctl(&mut self, op: &OpCCtl) { assert!(op.mem_space == MemSpace::Global); self.set_opcode(0x98f); @@ -1866,6 +1880,7 @@ impl SM70Instr { Op::ALd(op) => si.encode_ald(&op), Op::ASt(op) => si.encode_ast(&op), Op::Ipa(op) => si.encode_ipa(&op), + Op::LdTram(op) => si.encode_ldtram(&op), Op::CCtl(op) => si.encode_cctl(&op), Op::MemBar(op) => si.encode_membar(&op), Op::BMov(op) => si.encode_bmov(&op), diff --git a/src/nouveau/compiler/nak_from_nir.rs b/src/nouveau/compiler/nak_from_nir.rs index 18df8cd5102..40550924b37 100644 --- a/src/nouveau/compiler/nak_from_nir.rs +++ b/src/nouveau/compiler/nak_from_nir.rs @@ -1652,6 +1652,31 @@ impl<'a> ShaderFromNir<'a> { }); self.set_dst(&intrin.def, dst); } + nir_intrinsic_ldtram_nv => { + let ShaderIoInfo::Fragment(io) = &mut self.info.io else { + panic!("ldtram_nv is only used for fragment shaders"); + }; + + assert!( + intrin.def.bit_size() == 32 + && intrin.def.num_components == 2 + ); + + let flags = intrin.flags(); + let use_c = flags != 0; + + let addr = u16::try_from(intrin.base()).unwrap(); + + io.mark_barycentric_attr_in(addr); + + let dst = b.alloc_ssa(RegFile::GPR, 2); + b.push_op(OpLdTram { + dst: dst.into(), + addr, + use_c, + }); + self.set_dst(&intrin.def, dst); + } nir_intrinsic_load_sample_id => { let dst = b.alloc_ssa(RegFile::GPR, 1); b.push_op(OpPixLd { diff --git a/src/nouveau/compiler/nak_ir.rs b/src/nouveau/compiler/nak_ir.rs index 6af4eb1a05f..7e78b839cc3 100644 --- a/src/nouveau/compiler/nak_ir.rs +++ b/src/nouveau/compiler/nak_ir.rs @@ -3432,6 +3432,27 @@ impl fmt::Display for OpIpa { } } +#[repr(C)] +#[derive(SrcsAsSlice, DstsAsSlice)] +pub struct OpLdTram { + pub dst: Dst, + pub addr: u16, + pub use_c: bool, +} + +impl fmt::Display for OpLdTram { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "LDTRAM")?; + if self.use_c { + write!(f, ".C")?; + } else { + write!(f, ".AB")?; + } + write!(f, " {} a[{:#x}]", self.dst, self.addr)?; + Ok(()) + } +} + #[allow(dead_code)] pub enum CCtlOp { PF1, @@ -4221,6 +4242,7 @@ pub enum Op { ALd(OpALd), ASt(OpASt), Ipa(OpIpa), + LdTram(OpLdTram), CCtl(OpCCtl), MemBar(OpMemBar), BMov(OpBMov), @@ -4655,6 +4677,7 @@ impl Instr { | Op::ASt(_) | Op::Ipa(_) | Op::CCtl(_) + | Op::LdTram(_) | Op::MemBar(_) => false, // Control-flow ops diff --git a/src/nouveau/compiler/nak_nir.c b/src/nouveau/compiler/nak_nir.c index ef748adfdb1..585f4bef44e 100644 --- a/src/nouveau/compiler/nak_nir.c +++ b/src/nouveau/compiler/nak_nir.c @@ -632,6 +632,32 @@ load_sample_pos_at(nir_builder *b, nir_def *sample_id, return result; } +static nir_def * +load_barycentric_offset(nir_builder *b, nir_intrinsic_instr *bary, + const struct nak_fs_key *fs_key) +{ + nir_def *offset_f; + + if (bary->intrinsic == nir_intrinsic_load_barycentric_coord_at_sample || + bary->intrinsic == nir_intrinsic_load_barycentric_at_sample) { + nir_def *sample_id = bary->src[0].ssa; + nir_def *sample_pos = load_sample_pos_at(b, sample_id, fs_key); + offset_f = nir_fadd_imm(b, sample_pos, -0.5); + } else { + offset_f = bary->src[0].ssa; + } + + offset_f = nir_fclamp(b, offset_f, nir_imm_float(b, -0.5), + nir_imm_float(b, 0.437500)); + nir_def *offset_fixed = + nir_f2i32(b, nir_fmul_imm(b, offset_f, 4096.0)); + nir_def *offset = nir_ior(b, nir_ishl_imm(b, nir_channel(b, offset_fixed, 1), 16), + nir_iand_imm(b, nir_channel(b, offset_fixed, 0), + 0xffff)); + + return offset; +} + struct lower_fs_input_ctx { const struct nak_compiler *nak; const struct nak_fs_key *fs_key; @@ -714,6 +740,56 @@ lower_fs_input_intrin(nir_builder *b, nir_intrinsic_instr *intrin, void *data) return true; } + case nir_intrinsic_load_barycentric_coord_pixel: + case nir_intrinsic_load_barycentric_coord_centroid: + case nir_intrinsic_load_barycentric_coord_sample: + case nir_intrinsic_load_barycentric_coord_at_sample: + case nir_intrinsic_load_barycentric_coord_at_offset: { + b->cursor = nir_before_instr(&intrin->instr); + + uint32_t addr; + enum nak_interp_mode interp_mode; + if (nir_intrinsic_interp_mode(intrin) == INTERP_MODE_NOPERSPECTIVE) { + addr = NAK_ATTR_BARY_COORD_NO_PERSP; + interp_mode = NAK_INTERP_MODE_SCREEN_LINEAR; + } else { + addr = NAK_ATTR_BARY_COORD; + interp_mode = NAK_INTERP_MODE_PERSPECTIVE; + } + + nir_def *offset = NULL; + enum nak_interp_loc interp_loc; + switch (intrin->intrinsic) { + case nir_intrinsic_load_barycentric_coord_at_sample: + case nir_intrinsic_load_barycentric_coord_at_offset: + interp_loc = NAK_INTERP_LOC_OFFSET; + offset = load_barycentric_offset(b, intrin, ctx->fs_key); + break; + case nir_intrinsic_load_barycentric_coord_centroid: + case nir_intrinsic_load_barycentric_coord_sample: + interp_loc = NAK_INTERP_LOC_CENTROID; + break; + case nir_intrinsic_load_barycentric_coord_pixel: + interp_loc = NAK_INTERP_LOC_DEFAULT; + break; + default: + unreachable("Unknown intrinsic"); + } + + nir_def *inv_w = NULL; + if (interp_mode == NAK_INTERP_MODE_PERSPECTIVE) + inv_w = nir_frcp(b, load_frag_w(b, interp_loc, offset)); + + nir_def *res = load_interpolated_input(b, intrin->def.num_components, + addr, interp_mode, interp_loc, + inv_w, offset, ctx->nak); + + nir_def_rewrite_uses(&intrin->def, res); + nir_instr_remove(&intrin->instr); + + return true; + } + case nir_intrinsic_load_interpolated_input: { b->cursor = nir_before_instr(&intrin->instr); @@ -736,25 +812,7 @@ lower_fs_input_intrin(nir_builder *b, nir_intrinsic_instr *intrin, void *data) case nir_intrinsic_load_barycentric_at_offset: case nir_intrinsic_load_barycentric_at_sample: { interp_loc = NAK_INTERP_LOC_OFFSET; - - nir_def *offset_f; - - if (bary->intrinsic == nir_intrinsic_load_barycentric_at_sample) { - nir_def *sample_id = bary->src[0].ssa; - nir_def *sample_pos = load_sample_pos_at(b, sample_id, ctx->fs_key); - offset_f = nir_fadd_imm(b, sample_pos, -0.5); - } else { - offset_f = bary->src[0].ssa; - } - - offset_f = nir_fclamp(b, offset_f, nir_imm_float(b, -0.5), - nir_imm_float(b, 0.437500)); - nir_def *offset_fixed = - nir_f2i32(b, nir_fmul_imm(b, offset_f, 4096.0)); - offset = nir_ior(b, nir_ishl_imm(b, nir_channel(b, offset_fixed, 1), - 16), - nir_iand_imm(b, nir_channel(b, offset_fixed, 0), - 0xffff)); + offset = load_barycentric_offset(b, bary, ctx->fs_key); break; } @@ -813,6 +871,30 @@ lower_fs_input_intrin(nir_builder *b, nir_intrinsic_instr *intrin, void *data) return true; } + case nir_intrinsic_load_input_vertex: { + b->cursor = nir_before_instr(&intrin->instr); + + unsigned vertex_id = nir_src_as_uint(intrin->src[0]); + assert(vertex_id < 3); + + const uint16_t addr = nir_intrinsic_base(intrin) + + nir_src_as_uint(intrin->src[1]) + + nir_intrinsic_component(intrin) * 4; + + nir_def *comps[NIR_MAX_VEC_COMPONENTS]; + for (unsigned c = 0; c < intrin->def.num_components; c++) { + nir_def *data = nir_ldtram_nv(b, .base = addr + c * 4, + .flags = vertex_id == 2); + comps[c] = nir_channel(b, data, vertex_id & 1); + } + nir_def *res = nir_vec(b, comps, intrin->num_components); + + nir_def_rewrite_uses(&intrin->def, res); + nir_instr_remove(&intrin->instr); + + return true; + } + default: return false; } diff --git a/src/nouveau/compiler/nak_private.h b/src/nouveau/compiler/nak_private.h index cd07cc42559..3805959d23a 100644 --- a/src/nouveau/compiler/nak_private.h +++ b/src/nouveau/compiler/nak_private.h @@ -71,6 +71,16 @@ enum PACKED nak_attr { NAK_ATTR_INSTANCE_ID = 0x2f8, NAK_ATTR_VERTEX_ID = 0x2fc, + NAK_ATTR_BARY_COORD_NO_PERSP_X = 0x3a8, + NAK_ATTR_BARY_COORD_NO_PERSP_Y = 0x3ac, + NAK_ATTR_BARY_COORD_NO_PERSP_Z = 0x3b0, + NAK_ATTR_BARY_COORD_NO_PERSP = NAK_ATTR_BARY_COORD_NO_PERSP_X, + + NAK_ATTR_BARY_COORD_X = 0x3b4, + NAK_ATTR_BARY_COORD_Y = 0x3b8, + NAK_ATTR_BARY_COORD_Z = 0x3bc, + NAK_ATTR_BARY_COORD = NAK_ATTR_BARY_COORD_X, + /* Not in SPH */ NAK_ATTR_FRONT_FACE = 0x3fc, }; diff --git a/src/nouveau/vulkan/nvk_physical_device.c b/src/nouveau/vulkan/nvk_physical_device.c index bad5a5aeeca..1ccf1039c60 100644 --- a/src/nouveau/vulkan/nvk_physical_device.c +++ b/src/nouveau/vulkan/nvk_physical_device.c @@ -64,6 +64,8 @@ nvk_get_device_extensions(const struct nv_device_info *info, .KHR_external_semaphore = true, .KHR_external_semaphore_fd = true, .KHR_format_feature_flags2 = true, + .KHR_fragment_shader_barycentric = info->cls_eng3d >= TURING_A && + (nvk_nak_stages(info) & VK_SHADER_STAGE_FRAGMENT_BIT) != 0, .KHR_get_memory_requirements2 = true, .KHR_image_format_list = true, .KHR_imageless_framebuffer = true, @@ -254,6 +256,10 @@ nvk_get_device_features(const struct nv_device_info *info, .dynamicRendering = true, .maintenance4 = true, + /* VK_KHR_fragment_shader_barycentric */ + .fragmentShaderBarycentric = info->cls_eng3d >= TURING_A && + (nvk_nak_stages(info) & VK_SHADER_STAGE_FRAGMENT_BIT) != 0, + /* VK_KHR_pipeline_executable_properties */ .pipelineExecutableInfo = true, @@ -659,6 +665,9 @@ nvk_get_device_properties(const struct nvk_instance *instance, /* VK_EXT_vertex_attribute_divisor */ .maxVertexAttribDivisor = UINT32_MAX, + + /* VK_KHR_fragment_shader_barycentric */ + .triStripVertexOrderIndependentOfProvokingVertex = false, }; snprintf(properties->deviceName, sizeof(properties->deviceName), diff --git a/src/nouveau/vulkan/nvk_shader.c b/src/nouveau/vulkan/nvk_shader.c index 8e09eee10e7..38d136b0bf6 100644 --- a/src/nouveau/vulkan/nvk_shader.c +++ b/src/nouveau/vulkan/nvk_shader.c @@ -132,6 +132,7 @@ nvk_physical_device_spirv_options(const struct nvk_physical_device *pdev, .descriptor_indexing = true, .device_group = true, .draw_parameters = true, + .fragment_barycentric = true, .geometry_streams = true, .image_read_without_format = true, .image_write_without_format = true,