Module: Mesa
Branch: main
Commit: 275afe73f749790918a7acf1d20ec430fbb90a52
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=275afe73f749790918a7acf1d20ec430fbb90a52

Author: Mary Guillemard <mary.guillem...@collabora.com>
Date:   Thu Oct 12 17:45:53 2023 +0200

nvk: Implement VK_KHR_fragment_shader_barycentric

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/9619

Signed-off-by: Mary Guillemard <mary.guillem...@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26224>

---

 docs/features.txt                        |   2 +-
 src/nouveau/compiler/nak_encode_sm70.rs  |  15 ++++
 src/nouveau/compiler/nak_from_nir.rs     |  25 +++++++
 src/nouveau/compiler/nak_ir.rs           |  23 ++++++
 src/nouveau/compiler/nak_nir.c           | 120 ++++++++++++++++++++++++++-----
 src/nouveau/compiler/nak_private.h       |  10 +++
 src/nouveau/vulkan/nvk_physical_device.c |   9 +++
 src/nouveau/vulkan/nvk_shader.c          |   1 +
 8 files changed, 185 insertions(+), 20 deletions(-)

diff --git a/docs/features.txt b/docs/features.txt
index 7077285d981..379192a08db 100644
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -512,7 +512,7 @@ Khronos extensions that are not part of any Vulkan version:
   VK_KHR_external_memory_win32                          DONE (dzn)
   VK_KHR_external_semaphore_fd                          DONE (anv, dzn, hasvk, 
nvk, pvr, radv, tu, v3dv, vn)
   VK_KHR_external_semaphore_win32                       DONE (dzn)
-  VK_KHR_fragment_shader_barycentric                    DONE (radv/gfx10.3+)
+  VK_KHR_fragment_shader_barycentric                    DONE (nvk/Turing+, 
radv/gfx10.3+)
   VK_KHR_fragment_shading_rate                          DONE (anv/gen11+, 
radv/gfx10.3+)
   VK_KHR_get_display_properties2                        DONE (anv, pvr, radv, 
tu, v3dv)
   VK_KHR_get_surface_capabilities2                      DONE (anv, lvp, nvk, 
pvr, radv, tu, v3dv, vn)
diff --git a/src/nouveau/compiler/nak_encode_sm70.rs 
b/src/nouveau/compiler/nak_encode_sm70.rs
index 1e5d9ecfedc..f11ade5622a 100644
--- a/src/nouveau/compiler/nak_encode_sm70.rs
+++ b/src/nouveau/compiler/nak_encode_sm70.rs
@@ -1549,6 +1549,20 @@ impl SM70Instr {
         self.set_pred_dst(81..84, Dst::None);
     }
 
+    fn encode_ldtram(&mut self, op: &OpLdTram) {
+        self.set_opcode(0x3ad);
+        self.set_dst(op.dst);
+        self.set_ureg(24..32, RegRef::zero(RegFile::UGPR, 1));
+
+        assert!(op.addr % 4 == 0);
+        self.set_field(64..72, op.addr >> 2);
+
+        self.set_bit(72, op.use_c);
+
+        // Unknown but required
+        self.set_bit(91, true);
+    }
+
     fn encode_cctl(&mut self, op: &OpCCtl) {
         assert!(op.mem_space == MemSpace::Global);
         self.set_opcode(0x98f);
@@ -1866,6 +1880,7 @@ impl SM70Instr {
             Op::ALd(op) => si.encode_ald(&op),
             Op::ASt(op) => si.encode_ast(&op),
             Op::Ipa(op) => si.encode_ipa(&op),
+            Op::LdTram(op) => si.encode_ldtram(&op),
             Op::CCtl(op) => si.encode_cctl(&op),
             Op::MemBar(op) => si.encode_membar(&op),
             Op::BMov(op) => si.encode_bmov(&op),
diff --git a/src/nouveau/compiler/nak_from_nir.rs 
b/src/nouveau/compiler/nak_from_nir.rs
index 18df8cd5102..40550924b37 100644
--- a/src/nouveau/compiler/nak_from_nir.rs
+++ b/src/nouveau/compiler/nak_from_nir.rs
@@ -1652,6 +1652,31 @@ impl<'a> ShaderFromNir<'a> {
                 });
                 self.set_dst(&intrin.def, dst);
             }
+            nir_intrinsic_ldtram_nv => {
+                let ShaderIoInfo::Fragment(io) = &mut self.info.io else {
+                    panic!("ldtram_nv is only used for fragment shaders");
+                };
+
+                assert!(
+                    intrin.def.bit_size() == 32
+                        && intrin.def.num_components == 2
+                );
+
+                let flags = intrin.flags();
+                let use_c = flags != 0;
+
+                let addr = u16::try_from(intrin.base()).unwrap();
+
+                io.mark_barycentric_attr_in(addr);
+
+                let dst = b.alloc_ssa(RegFile::GPR, 2);
+                b.push_op(OpLdTram {
+                    dst: dst.into(),
+                    addr,
+                    use_c,
+                });
+                self.set_dst(&intrin.def, dst);
+            }
             nir_intrinsic_load_sample_id => {
                 let dst = b.alloc_ssa(RegFile::GPR, 1);
                 b.push_op(OpPixLd {
diff --git a/src/nouveau/compiler/nak_ir.rs b/src/nouveau/compiler/nak_ir.rs
index 6af4eb1a05f..7e78b839cc3 100644
--- a/src/nouveau/compiler/nak_ir.rs
+++ b/src/nouveau/compiler/nak_ir.rs
@@ -3432,6 +3432,27 @@ impl fmt::Display for OpIpa {
     }
 }
 
+#[repr(C)]
+#[derive(SrcsAsSlice, DstsAsSlice)]
+pub struct OpLdTram {
+    pub dst: Dst,
+    pub addr: u16,
+    pub use_c: bool,
+}
+
+impl fmt::Display for OpLdTram {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "LDTRAM")?;
+        if self.use_c {
+            write!(f, ".C")?;
+        } else {
+            write!(f, ".AB")?;
+        }
+        write!(f, " {} a[{:#x}]", self.dst, self.addr)?;
+        Ok(())
+    }
+}
+
 #[allow(dead_code)]
 pub enum CCtlOp {
     PF1,
@@ -4221,6 +4242,7 @@ pub enum Op {
     ALd(OpALd),
     ASt(OpASt),
     Ipa(OpIpa),
+    LdTram(OpLdTram),
     CCtl(OpCCtl),
     MemBar(OpMemBar),
     BMov(OpBMov),
@@ -4655,6 +4677,7 @@ impl Instr {
             | Op::ASt(_)
             | Op::Ipa(_)
             | Op::CCtl(_)
+            | Op::LdTram(_)
             | Op::MemBar(_) => false,
 
             // Control-flow ops
diff --git a/src/nouveau/compiler/nak_nir.c b/src/nouveau/compiler/nak_nir.c
index ef748adfdb1..585f4bef44e 100644
--- a/src/nouveau/compiler/nak_nir.c
+++ b/src/nouveau/compiler/nak_nir.c
@@ -632,6 +632,32 @@ load_sample_pos_at(nir_builder *b, nir_def *sample_id,
    return result;
 }
 
+static nir_def *
+load_barycentric_offset(nir_builder *b, nir_intrinsic_instr *bary,
+                        const struct nak_fs_key *fs_key)
+{
+   nir_def *offset_f;
+
+   if (bary->intrinsic == nir_intrinsic_load_barycentric_coord_at_sample ||
+       bary->intrinsic == nir_intrinsic_load_barycentric_at_sample) {
+      nir_def *sample_id = bary->src[0].ssa;
+      nir_def *sample_pos = load_sample_pos_at(b, sample_id, fs_key);
+      offset_f = nir_fadd_imm(b, sample_pos, -0.5);
+   } else {
+      offset_f = bary->src[0].ssa;
+   }
+
+   offset_f = nir_fclamp(b, offset_f, nir_imm_float(b, -0.5),
+                         nir_imm_float(b, 0.437500));
+   nir_def *offset_fixed =
+      nir_f2i32(b, nir_fmul_imm(b, offset_f, 4096.0));
+   nir_def *offset = nir_ior(b, nir_ishl_imm(b, nir_channel(b, offset_fixed, 
1), 16),
+                             nir_iand_imm(b, nir_channel(b, offset_fixed, 0),
+                                          0xffff));
+
+   return offset;
+}
+
 struct lower_fs_input_ctx {
    const struct nak_compiler *nak;
    const struct nak_fs_key *fs_key;
@@ -714,6 +740,56 @@ lower_fs_input_intrin(nir_builder *b, nir_intrinsic_instr 
*intrin, void *data)
       return true;
    }
 
+   case nir_intrinsic_load_barycentric_coord_pixel:
+   case nir_intrinsic_load_barycentric_coord_centroid:
+   case nir_intrinsic_load_barycentric_coord_sample:
+   case nir_intrinsic_load_barycentric_coord_at_sample:
+   case nir_intrinsic_load_barycentric_coord_at_offset: {
+      b->cursor = nir_before_instr(&intrin->instr);
+
+      uint32_t addr;
+      enum nak_interp_mode interp_mode;
+      if (nir_intrinsic_interp_mode(intrin) == INTERP_MODE_NOPERSPECTIVE) {
+         addr = NAK_ATTR_BARY_COORD_NO_PERSP;
+         interp_mode = NAK_INTERP_MODE_SCREEN_LINEAR;
+      } else {
+         addr = NAK_ATTR_BARY_COORD;
+         interp_mode = NAK_INTERP_MODE_PERSPECTIVE;
+      }
+
+      nir_def *offset = NULL;
+      enum nak_interp_loc interp_loc;
+      switch (intrin->intrinsic) {
+      case nir_intrinsic_load_barycentric_coord_at_sample:
+      case nir_intrinsic_load_barycentric_coord_at_offset:
+         interp_loc = NAK_INTERP_LOC_OFFSET;
+         offset = load_barycentric_offset(b, intrin, ctx->fs_key);
+         break;
+      case nir_intrinsic_load_barycentric_coord_centroid:
+      case nir_intrinsic_load_barycentric_coord_sample:
+         interp_loc = NAK_INTERP_LOC_CENTROID;
+         break;
+      case nir_intrinsic_load_barycentric_coord_pixel:
+         interp_loc = NAK_INTERP_LOC_DEFAULT;
+         break;
+      default:
+         unreachable("Unknown intrinsic");
+      }
+
+      nir_def *inv_w = NULL;
+      if (interp_mode == NAK_INTERP_MODE_PERSPECTIVE)
+         inv_w = nir_frcp(b, load_frag_w(b, interp_loc, offset));
+
+      nir_def *res = load_interpolated_input(b, intrin->def.num_components,
+                                             addr, interp_mode, interp_loc,
+                                             inv_w, offset, ctx->nak);
+
+      nir_def_rewrite_uses(&intrin->def, res);
+      nir_instr_remove(&intrin->instr);
+
+      return true;
+   }
+
    case nir_intrinsic_load_interpolated_input: {
       b->cursor = nir_before_instr(&intrin->instr);
 
@@ -736,25 +812,7 @@ lower_fs_input_intrin(nir_builder *b, nir_intrinsic_instr 
*intrin, void *data)
       case nir_intrinsic_load_barycentric_at_offset:
       case nir_intrinsic_load_barycentric_at_sample: {
          interp_loc = NAK_INTERP_LOC_OFFSET;
-
-         nir_def *offset_f;
-
-         if (bary->intrinsic == nir_intrinsic_load_barycentric_at_sample) {
-            nir_def *sample_id = bary->src[0].ssa;
-            nir_def *sample_pos = load_sample_pos_at(b, sample_id, 
ctx->fs_key);
-            offset_f = nir_fadd_imm(b, sample_pos, -0.5);
-         } else {
-            offset_f = bary->src[0].ssa;
-         }
-
-         offset_f = nir_fclamp(b, offset_f, nir_imm_float(b, -0.5),
-                               nir_imm_float(b, 0.437500));
-         nir_def *offset_fixed =
-            nir_f2i32(b, nir_fmul_imm(b, offset_f, 4096.0));
-         offset = nir_ior(b, nir_ishl_imm(b, nir_channel(b, offset_fixed, 1),
-                                             16),
-                             nir_iand_imm(b, nir_channel(b, offset_fixed, 0),
-                                             0xffff));
+         offset = load_barycentric_offset(b, bary, ctx->fs_key);
          break;
       }
 
@@ -813,6 +871,30 @@ lower_fs_input_intrin(nir_builder *b, nir_intrinsic_instr 
*intrin, void *data)
       return true;
    }
 
+   case nir_intrinsic_load_input_vertex: {
+      b->cursor = nir_before_instr(&intrin->instr);
+
+      unsigned vertex_id = nir_src_as_uint(intrin->src[0]);
+      assert(vertex_id < 3);
+
+      const uint16_t addr = nir_intrinsic_base(intrin) +
+                            nir_src_as_uint(intrin->src[1]) +
+                            nir_intrinsic_component(intrin) * 4;
+
+      nir_def *comps[NIR_MAX_VEC_COMPONENTS];
+      for (unsigned c = 0; c < intrin->def.num_components; c++) {
+         nir_def *data = nir_ldtram_nv(b, .base = addr + c * 4,
+                                       .flags = vertex_id == 2);
+         comps[c] = nir_channel(b, data, vertex_id & 1);
+      }
+      nir_def *res = nir_vec(b, comps, intrin->num_components);
+
+      nir_def_rewrite_uses(&intrin->def, res);
+      nir_instr_remove(&intrin->instr);
+
+      return true;
+   }
+
    default:
       return false;
    }
diff --git a/src/nouveau/compiler/nak_private.h 
b/src/nouveau/compiler/nak_private.h
index cd07cc42559..3805959d23a 100644
--- a/src/nouveau/compiler/nak_private.h
+++ b/src/nouveau/compiler/nak_private.h
@@ -71,6 +71,16 @@ enum PACKED nak_attr {
    NAK_ATTR_INSTANCE_ID       = 0x2f8,
    NAK_ATTR_VERTEX_ID         = 0x2fc,
 
+   NAK_ATTR_BARY_COORD_NO_PERSP_X = 0x3a8,
+   NAK_ATTR_BARY_COORD_NO_PERSP_Y = 0x3ac,
+   NAK_ATTR_BARY_COORD_NO_PERSP_Z = 0x3b0,
+   NAK_ATTR_BARY_COORD_NO_PERSP = NAK_ATTR_BARY_COORD_NO_PERSP_X,
+
+   NAK_ATTR_BARY_COORD_X = 0x3b4,
+   NAK_ATTR_BARY_COORD_Y = 0x3b8,
+   NAK_ATTR_BARY_COORD_Z = 0x3bc,
+   NAK_ATTR_BARY_COORD = NAK_ATTR_BARY_COORD_X,
+
    /* Not in SPH */
    NAK_ATTR_FRONT_FACE        = 0x3fc,
 };
diff --git a/src/nouveau/vulkan/nvk_physical_device.c 
b/src/nouveau/vulkan/nvk_physical_device.c
index bad5a5aeeca..1ccf1039c60 100644
--- a/src/nouveau/vulkan/nvk_physical_device.c
+++ b/src/nouveau/vulkan/nvk_physical_device.c
@@ -64,6 +64,8 @@ nvk_get_device_extensions(const struct nv_device_info *info,
       .KHR_external_semaphore = true,
       .KHR_external_semaphore_fd = true,
       .KHR_format_feature_flags2 = true,
+      .KHR_fragment_shader_barycentric = info->cls_eng3d >= TURING_A &&
+         (nvk_nak_stages(info) & VK_SHADER_STAGE_FRAGMENT_BIT) != 0,
       .KHR_get_memory_requirements2 = true,
       .KHR_image_format_list = true,
       .KHR_imageless_framebuffer = true,
@@ -254,6 +256,10 @@ nvk_get_device_features(const struct nv_device_info *info,
       .dynamicRendering = true,
       .maintenance4 = true,
 
+      /* VK_KHR_fragment_shader_barycentric */
+      .fragmentShaderBarycentric = info->cls_eng3d >= TURING_A &&
+         (nvk_nak_stages(info) & VK_SHADER_STAGE_FRAGMENT_BIT) != 0,
+
       /* VK_KHR_pipeline_executable_properties */
       .pipelineExecutableInfo = true,
 
@@ -659,6 +665,9 @@ nvk_get_device_properties(const struct nvk_instance 
*instance,
 
       /* VK_EXT_vertex_attribute_divisor */
       .maxVertexAttribDivisor = UINT32_MAX,
+
+      /* VK_KHR_fragment_shader_barycentric */
+      .triStripVertexOrderIndependentOfProvokingVertex = false,
    };
 
    snprintf(properties->deviceName, sizeof(properties->deviceName),
diff --git a/src/nouveau/vulkan/nvk_shader.c b/src/nouveau/vulkan/nvk_shader.c
index 8e09eee10e7..38d136b0bf6 100644
--- a/src/nouveau/vulkan/nvk_shader.c
+++ b/src/nouveau/vulkan/nvk_shader.c
@@ -132,6 +132,7 @@ nvk_physical_device_spirv_options(const struct 
nvk_physical_device *pdev,
          .descriptor_indexing = true,
          .device_group = true,
          .draw_parameters = true,
+         .fragment_barycentric = true,
          .geometry_streams = true,
          .image_read_without_format = true,
          .image_write_without_format = true,

Reply via email to