Add the BAR1 user interface for CPU access to GPU virtual memory through
the BAR1 aperture.

Signed-off-by: Joel Fernandes <[email protected]>
---
 drivers/gpu/nova-core/driver.rs       |  22 ++-
 drivers/gpu/nova-core/gpu.rs          |  41 +++++-
 drivers/gpu/nova-core/gsp/commands.rs |   1 -
 drivers/gpu/nova-core/mm.rs           |   1 +
 drivers/gpu/nova-core/mm/bar_user.rs  | 194 ++++++++++++++++++++++++++
 5 files changed, 255 insertions(+), 4 deletions(-)
 create mode 100644 drivers/gpu/nova-core/mm/bar_user.rs

diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs
index b14d4b599783..207ba164cf4e 100644
--- a/drivers/gpu/nova-core/driver.rs
+++ b/drivers/gpu/nova-core/driver.rs
@@ -2,10 +2,12 @@
 
 use kernel::{
     auxiliary,
+    device::Bound,
     device::Core,
     devres::Devres,
     dma::Device,
     dma::DmaMask,
+    io::resource,
     pci,
     pci::{
         Class,
@@ -47,9 +49,27 @@ pub(crate) struct NovaCore {
 const GPU_DMA_BITS: u32 = 47;
 
 pub(crate) type Bar0 = pci::Bar<BAR0_SIZE>;
-#[expect(dead_code)]
 pub(crate) type Bar1 = pci::Bar;
 
+/// Returns the Linux PCI resource index that holds BAR1 for an NVIDIA GPU.
+///
+/// On Maxwell through Ada, BAR0 is a 32-bit memory BAR occupying a single
+/// Linux PCI resource slot, so BAR1 lives at index 1. Starting with Blackwell
+/// (and on some Ampere GA100 / Hopper SKUs) BAR0 is a 64-bit memory BAR that
+/// consumes two consecutive resource slots: index 0 holds the low 32 bits and
+/// index 1 holds the high 32 bits (with no `flags` / or size of its own),
+/// shifting BAR1 to index 2.
+pub(crate) fn bar1_resource_index(pdev: &pci::Device<Bound>) -> Result<u32> {
+    // Probe the `IORESOURCE_MEM_64` flag of BAR0 as a robust way of exposing
+    // if BAR0 and hence BAR1 is 64-bit.
+    let flags0 = pdev.resource_flags(0)?;
+    if flags0.contains(resource::Flags::IORESOURCE_MEM_64) {
+        Ok(2)
+    } else {
+        Ok(1)
+    }
+}
+
 kernel::pci_device_table!(
     PCI_TABLE,
     MODULE_PCI_TABLE,
diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
index f789d956cc49..b0eebe6406e5 100644
--- a/drivers/gpu/nova-core/gpu.rs
+++ b/drivers/gpu/nova-core/gpu.rs
@@ -19,7 +19,10 @@
 
 use crate::{
     bounded_enum,
-    driver::Bar0,
+    driver::{
+        Bar0,
+        Bar1, //
+    },
     falcon::{
         gsp::Gsp as GspFalcon,
         sec2::Sec2 as Sec2Falcon,
@@ -31,8 +34,11 @@
         Gsp, //
     },
     mm::{
+        bar_user::BarUser,
+        pagetable::MmuVersion,
         GpuMm,
-        IntoVramRange, //
+        IntoVramRange,
+        VramAddress, //
     },
     regs,
 };
@@ -145,6 +151,11 @@ pub(crate) const fn arch(self) -> Architecture {
     pub(crate) const fn needs_fwsec_bootloader(self) -> bool {
         matches!(self.arch(), Architecture::Turing) || matches!(self, 
Self::GA100)
     }
+
+    /// Returns the MMU version for this chipset.
+    pub(crate) fn mmu_version(self) -> MmuVersion {
+        MmuVersion::from(self.arch())
+    }
 }
 
 // TODO
@@ -263,6 +274,8 @@ pub(crate) struct Gpu {
     spec: Spec,
     /// MMIO mapping of PCI BAR 0
     bar: Arc<Devres<Bar0>>,
+    /// MMIO mapping of PCI BAR 1.
+    bar1: Arc<Devres<Bar1>>,
     /// System memory page required for flushing all pending GPU-side memory 
writes done through
     /// PCIE into system memory, via sysmembar (A GPU-initiated HW 
memory-barrier operation).
     sysmem_flush: SysmemFlush,
@@ -276,6 +289,8 @@ pub(crate) struct Gpu {
     #[pin]
     gsp: Gsp,
     gsp_static_info: GetGspStaticInfoReply,
+    /// BAR1 user interface for CPU access to GPU virtual memory.
+    bar_user: Arc<BarUser>,
 }
 
 impl Gpu {
@@ -348,6 +363,28 @@ pub(crate) fn new<'a>(
                 )?
             },
 
+            bar1: {
+                let bar1_idx = crate::driver::bar1_resource_index(pdev)?;
+                Arc::pin_init(pdev.iomap_region(bar1_idx, c"nova-core/bar1"), 
GFP_KERNEL)?
+            },
+
+            // Create BAR1 user interface for CPU access to GPU virtual memory.
+            bar_user: {
+                let pdb_addr = VramAddress::new(gsp_static_info.bar1_pde_base);
+                let bar1_idx = crate::driver::bar1_resource_index(pdev)?;
+                let bar1_size = pdev.resource_len(bar1_idx)?;
+                Arc::pin_init(
+                    BarUser::new(
+                        pdb_addr,
+                        spec.chipset,
+                        bar1_size,
+                        mm.clone(),
+                        bar1.clone(),
+                    )?,
+                    GFP_KERNEL,
+                )?
+            },
+
             bar: devres_bar,
         })
     }
diff --git a/drivers/gpu/nova-core/gsp/commands.rs 
b/drivers/gpu/nova-core/gsp/commands.rs
index bee7539eff60..301c95686efd 100644
--- a/drivers/gpu/nova-core/gsp/commands.rs
+++ b/drivers/gpu/nova-core/gsp/commands.rs
@@ -194,7 +194,6 @@ fn init(&self) -> impl Init<Self::Command, Self::InitError> 
{
 pub(crate) struct GetGspStaticInfoReply {
     gpu_name: [u8; 64],
     /// BAR1 Page Directory Entry base address.
-    #[expect(dead_code)]
     pub(crate) bar1_pde_base: u64,
     /// Usable FB (VRAM) region for driver memory allocation.
     pub(crate) usable_fb_region: Range<u64>,
diff --git a/drivers/gpu/nova-core/mm.rs b/drivers/gpu/nova-core/mm.rs
index 502c7fdceba2..4741ef60593b 100644
--- a/drivers/gpu/nova-core/mm.rs
+++ b/drivers/gpu/nova-core/mm.rs
@@ -31,6 +31,7 @@ macro_rules! impl_pfn_bounded {
     };
 }
 
+pub(crate) mod bar_user;
 pub(super) mod pagetable;
 pub(crate) mod pramin;
 pub(super) mod tlb;
diff --git a/drivers/gpu/nova-core/mm/bar_user.rs 
b/drivers/gpu/nova-core/mm/bar_user.rs
new file mode 100644
index 000000000000..bb9742c036b7
--- /dev/null
+++ b/drivers/gpu/nova-core/mm/bar_user.rs
@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! BAR1 user interface for CPU access to GPU virtual memory. Used for USERD
+//! for GPU work submission, and applications to access GPU buffers via mmap().
+
+use kernel::{
+    device,
+    devres::Devres,
+    io::Io,
+    new_mutex,
+    prelude::*,
+    sync::{
+        Arc,
+        Mutex, //
+    },
+};
+
+use crate::{
+    driver::Bar1,
+    gpu::Chipset,
+    mm::{
+        vmm::{
+            MappedRange,
+            Vmm, //
+        },
+        GpuMm,
+        Pfn,
+        Vfn,
+        VirtualAddress,
+        VramAddress,
+        PAGE_SIZE, //
+    },
+    num::IntoSafeCast,
+};
+
+/// BAR1 user interface for virtual memory mappings.
+///
+/// Owns the [`Vmm`] for the BAR1 address space.
+#[pin_data]
+pub(crate) struct BarUser {
+    #[pin]
+    vmm: Mutex<Vmm>,
+    mm: Arc<GpuMm>,
+    bar1: Arc<Devres<Bar1>>,
+}
+
+impl BarUser {
+    /// Create a pin-initializer for [`BarUser`].
+    pub(crate) fn new(
+        pdb_addr: VramAddress,
+        chipset: Chipset,
+        va_size: u64,
+        mm: Arc<GpuMm>,
+        bar1: Arc<Devres<Bar1>>,
+    ) -> Result<impl PinInit<Self>> {
+        let vmm = Vmm::new(pdb_addr, chipset.mmu_version(), va_size)?;
+        Ok(pin_init!(Self {
+            vmm <- new_mutex!(vmm, "bar_user_vmm"),
+            mm,
+            bar1,
+        }))
+    }
+
+    /// Map physical pages to a contiguous BAR1 virtual range.
+    pub(crate) fn map(
+        self: &Arc<Self>,
+        dev: &device::Device<device::Bound>,
+        pfns: &[Pfn],
+        writable: bool,
+    ) -> Result<BarUserAccess> {
+        if pfns.is_empty() {
+            return Err(EINVAL);
+        }
+        let mut vmm = self.vmm.lock();
+        let mapped = vmm.map_pages(dev, &self.mm, pfns, None, writable)?;
+
+        Ok(BarUserAccess {
+            bar_user: self.clone(),
+            mapped: Some(mapped),
+        })
+    }
+}
+
+/// Access object for a mapped BAR1 region.
+pub(crate) struct BarUserAccess {
+    bar_user: Arc<BarUser>,
+    /// [`BarUserAccess::release`] [`Option::take`]s this; `Some` at
+    /// drop time means `release()` was never called.
+    mapped: Option<MappedRange>,
+}
+
+impl BarUserAccess {
+    /// Tear down the BAR1 mapping using a caller-supplied bound device.
+    pub(crate) fn release(mut self, dev: &device::Device<device::Bound>) -> 
Result {
+        let mapped = self.mapped.take().ok_or(EINVAL)?;
+        let mut vmm = self.bar_user.vmm.lock();
+        vmm.unmap_pages(dev, &self.bar_user.mm, mapped)?;
+        Ok(())
+    }
+
+    /// Returns the active mapping.
+    fn mapped(&self) -> &MappedRange {
+        // `mapped` is only `None` after `take()` in `release`; hence unwrap()
+        // cannot panic here.
+        self.mapped.as_ref().unwrap()
+    }
+
+    /// Get the base virtual address of this mapping.
+    pub(crate) fn base(&self) -> VirtualAddress {
+        VirtualAddress::from(self.mapped().vfn_start)
+    }
+
+    /// Get the total size of the mapped region in bytes.
+    pub(crate) fn size(&self) -> usize {
+        self.mapped().num_pages * PAGE_SIZE
+    }
+
+    /// Get the starting virtual frame number.
+    pub(crate) fn vfn_start(&self) -> Vfn {
+        self.mapped().vfn_start
+    }
+
+    /// Get the number of pages in this mapping.
+    pub(crate) fn num_pages(&self) -> usize {
+        self.mapped().num_pages
+    }
+
+    /// Translate an offset within this mapping to a BAR1 aperture offset.
+    fn bar_offset(&self, offset: usize) -> Result<usize> {
+        if offset >= self.size() {
+            return Err(EINVAL);
+        }
+
+        let base_vfn: usize = self.mapped().vfn_start.raw().into_safe_cast();
+        let base = base_vfn.checked_mul(PAGE_SIZE).ok_or(EOVERFLOW)?;
+        base.checked_add(offset).ok_or(EOVERFLOW)
+    }
+
+    // Fallible accessors with runtime bounds checking.
+
+    /// Read a 32-bit value at the given offset.
+    pub(crate) fn try_read32(
+        &self,
+        dev: &device::Device<device::Bound>,
+        offset: usize,
+    ) -> Result<u32> {
+        let off = self.bar_offset(offset)?;
+        self.bar_user.bar1.access(dev)?.try_read32(off)
+    }
+
+    /// Write a 32-bit value at the given offset.
+    pub(crate) fn try_write32(
+        &self,
+        dev: &device::Device<device::Bound>,
+        value: u32,
+        offset: usize,
+    ) -> Result {
+        let off = self.bar_offset(offset)?;
+        self.bar_user.bar1.access(dev)?.try_write32(value, off)
+    }
+
+    /// Read a 64-bit value at the given offset.
+    pub(crate) fn try_read64(
+        &self,
+        dev: &device::Device<device::Bound>,
+        offset: usize,
+    ) -> Result<u64> {
+        let off = self.bar_offset(offset)?;
+        self.bar_user.bar1.access(dev)?.try_read64(off)
+    }
+
+    /// Write a 64-bit value at the given offset.
+    pub(crate) fn try_write64(
+        &self,
+        dev: &device::Device<device::Bound>,
+        value: u64,
+        offset: usize,
+    ) -> Result {
+        let off = self.bar_offset(offset)?;
+        self.bar_user.bar1.access(dev)?.try_write64(value, off)
+    }
+}
+
+impl Drop for BarUserAccess {
+    fn drop(&mut self) {
+        if self.mapped.is_some() {
+            kernel::pr_warn!(
+                "BarUserAccess dropped without calling release(). BarUser 
address space will leak.\n"
+            );
+        }
+        // The inner `MappedRange`'s own `MustUnmapGuard` will also fire,
+        // identifying the leaked VA range.
+    }
+}
-- 
2.34.1

Reply via email to