Extend GpuMm with the remaining two memory-management components:

- Buddy allocator for VRAM allocation.
- TLB manager for translation buffer operations.

PRAMIN was added in an earlier commit; this completes the centralized
ownership model with accessor methods for each component.

Signed-off-by: Joel Fernandes <[email protected]>
---
 drivers/gpu/nova-core/Kconfig         |   1 +
 drivers/gpu/nova-core/gpu.rs          |  22 ++++-
 drivers/gpu/nova-core/gsp/commands.rs |   1 -
 drivers/gpu/nova-core/mm.rs           |  27 ++++++
 drivers/gpu/nova-core/mm/tlb.rs       | 130 ++++++++++++++++++++++++++
 drivers/gpu/nova-core/regs.rs         |  65 +++++++++++++
 6 files changed, 244 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/nova-core/mm/tlb.rs

diff --git a/drivers/gpu/nova-core/Kconfig b/drivers/gpu/nova-core/Kconfig
index abf10e82647b..8eebb430856a 100644
--- a/drivers/gpu/nova-core/Kconfig
+++ b/drivers/gpu/nova-core/Kconfig
@@ -5,6 +5,7 @@ config NOVA_CORE
        depends on RUST
        depends on !CPU_BIG_ENDIAN
        select AUXILIARY_BUS
+       select GPU_BUDDY
        select RUST_FW_LOADER_ABSTRACTIONS
        default n
        help
diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
index aa047fe91054..f789d956cc49 100644
--- a/drivers/gpu/nova-core/gpu.rs
+++ b/drivers/gpu/nova-core/gpu.rs
@@ -4,11 +4,16 @@
     device,
     devres::Devres,
     fmt,
+    gpu::buddy::GpuBuddyParams,
     io::Io,
     num::Bounded,
     pci,
     prelude::*,
-    sizes::SizeConstants,
+    ptr::Alignment,
+    sizes::{
+        SizeConstants,
+        SZ_4K, //
+    },
     sync::Arc, //
 };
 
@@ -305,6 +310,13 @@ pub(crate) fn new<'a>(
             gsp_static_info: gsp
                 .boot(pdev, bar, spec.chipset, gsp_falcon, sec2_falcon)
                 .inspect(|info| {
+                    dev_info!(
+                        pdev.as_ref(),
+                        "Using FB region: {:#x}..{:#x}\n",
+                        info.usable_fb_region.start,
+                        info.usable_fb_region.end
+                    );
+
                     dev_info!(
                         pdev.as_ref(),
                         "Total physical VRAM: {} MiB\n",
@@ -314,14 +326,22 @@ pub(crate) fn new<'a>(
 
             // Create GPU memory manager owning memory management resources.
             mm: {
+                let usable_vram = &gsp_static_info.usable_fb_region;
+
                 // PRAMIN covers all physical VRAM (including GSP-reserved 
areas
                 // above the usable region, e.g. the BAR1 page directory).
                 let pramin_vram_region = 
(0..gsp_static_info.total_fb_end).into_vram_range();
+                let buddy_params = GpuBuddyParams {
+                    base_offset: usable_vram.start,
+                    size: usable_vram.end - usable_vram.start,
+                    chunk_size: Alignment::new::<SZ_4K>(),
+                };
                 Arc::pin_init(
                     GpuMm::new(
                         devres_bar.clone(),
                         pdev.as_ref(),
                         spec.chipset,
+                        buddy_params,
                         pramin_vram_region,
                     )?,
                     GFP_KERNEL,
diff --git a/drivers/gpu/nova-core/gsp/commands.rs 
b/drivers/gpu/nova-core/gsp/commands.rs
index 172411d7b475..5abd7950320b 100644
--- a/drivers/gpu/nova-core/gsp/commands.rs
+++ b/drivers/gpu/nova-core/gsp/commands.rs
@@ -194,7 +194,6 @@ fn init(&self) -> impl Init<Self::Command, Self::InitError> 
{
 pub(crate) struct GetGspStaticInfoReply {
     gpu_name: [u8; 64],
     /// Usable FB (VRAM) region for driver memory allocation.
-    #[expect(dead_code)]
     pub(crate) usable_fb_region: Range<u64>,
     /// End of VRAM.
     pub(crate) total_fb_end: u64,
diff --git a/drivers/gpu/nova-core/mm.rs b/drivers/gpu/nova-core/mm.rs
index b23667a55ecd..ea415a88b221 100644
--- a/drivers/gpu/nova-core/mm.rs
+++ b/drivers/gpu/nova-core/mm.rs
@@ -32,6 +32,7 @@ macro_rules! impl_pfn_bounded {
 }
 
 pub(crate) mod pramin;
+pub(super) mod tlb;
 
 use core::ops::Range;
 
@@ -39,6 +40,10 @@ macro_rules! impl_pfn_bounded {
     bitfield,
     device,
     devres::Devres,
+    gpu::buddy::{
+        GpuBuddy,
+        GpuBuddyParams, //
+    },
     num::Bounded,
     pci,
     prelude::*,
@@ -51,14 +56,21 @@ macro_rules! impl_pfn_bounded {
     gpu::Chipset, //
 };
 
+pub(crate) use tlb::Tlb;
+
 /// GPU Memory Manager - owns all core MM components.
 ///
 /// Provides centralized ownership of memory management resources:
+/// - [`GpuBuddy`] allocator for VRAM page table allocation.
 /// - [`pramin::Pramin`] for direct VRAM access.
+/// - [`Tlb`] manager for translation buffer flush operations.
 #[pin_data]
 pub(crate) struct GpuMm {
+    buddy: GpuBuddy,
     #[pin]
     pramin: pramin::Pramin,
+    #[pin]
+    tlb: Tlb,
 }
 
 impl GpuMm {
@@ -70,19 +82,34 @@ pub(crate) fn new(
         bar: Arc<Devres<Bar0>>,
         dev: &device::Device<device::Bound>,
         chipset: Chipset,
+        buddy_params: GpuBuddyParams,
         pramin_vram_region: Range<VramAddress>,
     ) -> Result<impl PinInit<Self>> {
+        let buddy = GpuBuddy::new(buddy_params)?;
+        let tlb_init = Tlb::new(bar.clone());
         let pramin_init = pramin::Pramin::new(bar, dev, chipset, 
pramin_vram_region)?;
 
         Ok(pin_init!(Self {
+            buddy,
             pramin <- pramin_init,
+            tlb <- tlb_init,
         }))
     }
 
+    /// Access the [`GpuBuddy`] allocator.
+    pub(crate) fn buddy(&self) -> &GpuBuddy {
+        &self.buddy
+    }
+
     /// Access the [`pramin::Pramin`].
     pub(crate) fn pramin(&self) -> &pramin::Pramin {
         &self.pramin
     }
+
+    /// Access the [`Tlb`] manager.
+    pub(crate) fn tlb(&self) -> &Tlb {
+        &self.tlb
+    }
 }
 
 /// Run MM subsystem self-tests during probe.
diff --git a/drivers/gpu/nova-core/mm/tlb.rs b/drivers/gpu/nova-core/mm/tlb.rs
new file mode 100644
index 000000000000..1c4f8944a01b
--- /dev/null
+++ b/drivers/gpu/nova-core/mm/tlb.rs
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! TLB (Translation Lookaside Buffer) flush support for GPU MMU.
+//!
+//! After modifying page table entries, the GPU's TLB must be flushed to
+//! ensure the new mappings take effect. This module provides TLB flush
+//! functionality for virtual memory managers.
+//!
+//! # Examples
+//!
+//! ```ignore
+//! use crate::mm::tlb::Tlb;
+//!
+//! fn page_table_update(
+//!     dev: &device::Device<device::Bound>,
+//!     tlb: &Tlb,
+//!     pdb_addr: VramAddress,
+//! ) -> Result<()> {
+//!     // ... modify page tables ...
+//!
+//!     // Flush TLB to make changes visible (polls for completion).
+//!     tlb.flush(dev, pdb_addr)?;
+//!
+//!     Ok(())
+//! }
+//! ```
+
+use kernel::{
+    device,
+    devres::Devres,
+    io::poll::read_poll_timeout,
+    io::Io,
+    new_mutex,
+    prelude::*,
+    sync::{
+        Arc,
+        Mutex, //
+    },
+    time::Delta, //
+};
+
+use crate::{
+    bounded_enum,
+    driver::Bar0,
+    mm::VramAddress,
+    regs, //
+};
+
+bounded_enum! {
+    /// TLB invalidation acknowledgment scope.
+    ///
+    /// Controls how far the hardware waits for the invalidation to propagate
+    /// before clearing the `trigger` bit of `NV_TLB_FLUSH_CTRL`.
+    #[derive(Debug, Copy, Clone, PartialEq, Eq)]
+    pub(crate) enum TlbAckMode with TryFrom<Bounded<u32, 2>> {
+        /// Fire-and-forget: no acknowledgment required.
+        None = 0,
+        /// Wait for acknowledgment from all consumers, including remote GPUs
+        /// reachable over NVLink.
+        ///
+        /// Globally is strictly required only during unmap or permission
+        /// tightening, because the backing memory may be reassigned after the
+        /// flush returns and a stale TLB entry could let the GPU access freed
+        /// memory. For new mapping or relaxing permissions, a stale entry 
would
+        /// merely cause a redundant fault and retry, so [`TlbAckMode::None`]
+        /// would suffice.
+        Globally = 1,
+        /// Wait for acknowledgment from consumers within the local NVLink
+        /// fabric node only; skip cross-node ack.
+        Intranode = 2,
+    }
+}
+
+/// TLB manager for GPU translation buffer operations.
+#[pin_data]
+pub(crate) struct Tlb {
+    bar: Arc<Devres<Bar0>>,
+    /// TLB flush serialization lock: This lock is designed to be acquired 
during
+    /// the DMA fence signalling critical path. It should NEVER be held across 
any
+    /// reclaimable CPU memory allocations because the memory reclaim path can
+    /// call `dma_fence_wait()` (when implemented), which would deadlock if 
lock held.
+    #[pin]
+    lock: Mutex<()>,
+}
+
+impl Tlb {
+    /// Create a new TLB manager.
+    pub(super) fn new(bar: Arc<Devres<Bar0>>) -> impl PinInit<Self> {
+        pin_init!(Self {
+            bar,
+            lock <- new_mutex!((), "tlb_flush"),
+        })
+    }
+
+    /// Flush the GPU TLB for a specific page directory base.
+    ///
+    /// This invalidates all TLB entries associated with the given PDB address.
+    /// Must be called after modifying page table entries to ensure the GPU 
sees
+    /// the updated mappings.
+    pub(super) fn flush(
+        &self,
+        dev: &device::Device<device::Bound>,
+        pdb_addr: VramAddress,
+    ) -> Result {
+        let _guard = self.lock.lock();
+        let bar = self.bar.access(dev)?;
+
+        // Write PDB address.
+        
bar.write_reg(regs::NV_TLB_FLUSH_PDB_LO::from_pdb_addr(pdb_addr.raw()));
+        
bar.write_reg(regs::NV_TLB_FLUSH_PDB_HI::from_pdb_addr(pdb_addr.raw()));
+
+        // Trigger flush.
+        bar.write_reg(
+            regs::NV_TLB_FLUSH_CTRL::zeroed()
+                .with_all_va(true)
+                .with_ack(TlbAckMode::None)
+                .with_trigger(true),
+        );
+
+        // Poll for completion.
+        read_poll_timeout(
+            || Ok(bar.read(regs::NV_TLB_FLUSH_CTRL)),
+            |ctrl: &regs::NV_TLB_FLUSH_CTRL| !ctrl.trigger(),
+            Delta::ZERO,
+            Delta::from_secs(2),
+        )?;
+
+        Ok(())
+    }
+}
diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs
index fb42d96a59b2..277eb1a064f7 100644
--- a/drivers/gpu/nova-core/regs.rs
+++ b/drivers/gpu/nova-core/regs.rs
@@ -37,6 +37,7 @@
     },
     mm::{
         pramin::Bar0WindowTarget,
+        tlb::TlbAckMode,
         VramAddress, //
     },
 };
@@ -659,3 +660,67 @@ pub(crate) fn pramin_window_write_base(
         }
     }
 }
+
+// MMU TLB
+
+register! {
+    /// TLB flush register: PDB address lower bits.
+    pub(crate) NV_TLB_FLUSH_PDB_LO(u32) @ 0x00b830a0 {
+        /// PDB address bits [39:8].
+        31:0    pdb_lo => u32;
+    }
+
+    /// TLB flush register: PDB address higher bits.
+    pub(crate) NV_TLB_FLUSH_PDB_HI(u32) @ 0x00b830a4 {
+        /// PDB address bits [47:40].
+        7:0     pdb_hi => u8;
+    }
+
+    /// TLB flush control register.
+    pub(crate) NV_TLB_FLUSH_CTRL(u32) @ 0x00b830b0 {
+        /// Invalidate every VA in the PDB selected by 
`NV_TLB_FLUSH_PDB_LO/HI`.
+        0:0     all_va => bool;
+        /// Invalidate TLBs for all PDBs (ignores `NV_TLB_FLUSH_PDB_LO/HI`).
+        1:1     all_pdb => bool;
+        /// Restrict the flush to the HUB MMU's TLBs; skip broadcasting to the
+        /// per-GPC L2 TLBs.
+        ///
+        /// The GPU MMU has a two-level TLB hierarchy:
+        /// 1. The *HUB MMU* sits at the top and serves memory requests from
+        ///    "host-side" engines: the host/channel interface, copy engines,
+        ///    display, and BAR1/BAR2 accesses.
+        /// 2. Each GPC (Graphics Processing Cluster — the block that houses
+        ///    shader cores / SMs) has its own L2 TLB that serves requests from
+        ///    the compute and graphics engines inside the cluster.
+        ///
+        /// When set, only the HUB TLBs are invalidated. This is a performance
+        /// optimization for flushes that only affect HUB-side mappings (e.g.
+        /// BAR1/BAR2 windows), where fanning the invalidation out to every
+        /// GPC's L2 TLB would be wasted work. Must be false when flushing
+        /// mappings that may be cached by compute/graphics engines.
+        2:2     hubtlb_only => bool;
+        /// Invalidation acknowledgment scope. See [`TlbAckMode`] for details.
+        8:7     ack ?=> TlbAckMode;
+        /// Write 1 to kick off the flush. Hardware clears this bit when the
+        /// flush completes; reads as 1 while the flush is in progress.
+        31:31   trigger => bool;
+    }
+}
+
+impl NV_TLB_FLUSH_PDB_LO {
+    /// Create a register value from a PDB address.
+    ///
+    /// Extracts bits [39:8] of the address and shifts it right by 8 bits.
+    pub(crate) fn from_pdb_addr(addr: u64) -> Self {
+        Self::zeroed().with_pdb_lo(((addr >> 8) & 0xFFFF_FFFF) as u32)
+    }
+}
+
+impl NV_TLB_FLUSH_PDB_HI {
+    /// Create a register value from a PDB address.
+    ///
+    /// Extracts bits [47:40] of the address and shifts it right by 40 bits.
+    pub(crate) fn from_pdb_addr(addr: u64) -> Self {
+        Self::zeroed().with_pdb_hi(((addr >> 40) & 0xFF) as u8)
+    }
+}
-- 
2.34.1

Reply via email to