PRAMIN apertures are a crucial mechanism for direct CPU read/write to
VRAM. Add support for PRAMIN windows on all supported GPU architectures:
Turing, Ampere, Ada (via `NV_PBUS_BAR0_WINDOW`), Hopper (via
`gh100::NV_XAL_EP_BAR0_WINDOW`), and Blackwell (via
`gb100::NV_XAL_EP_BAR0_WINDOW`). Architecture-dispatched
`pramin_window_{read,write}_base()` helpers in `regs.rs` encapsulate the
per-arch register selection.Hopper/Blackwell window-base dispatch is based on Eliot Courtney's offlist reference patch. Cc: Eliot Courtney <[email protected]> Signed-off-by: Joel Fernandes <[email protected]> --- drivers/gpu/nova-core/mm.rs | 2 + drivers/gpu/nova-core/mm/pramin.rs | 298 +++++++++++++++++++++++++++++ drivers/gpu/nova-core/nova_core.rs | 1 + drivers/gpu/nova-core/regs.rs | 122 ++++++++++++ 4 files changed, 423 insertions(+) create mode 100644 drivers/gpu/nova-core/mm/pramin.rs diff --git a/drivers/gpu/nova-core/mm.rs b/drivers/gpu/nova-core/mm.rs index 3bc9befab397..f425467281d3 100644 --- a/drivers/gpu/nova-core/mm.rs +++ b/drivers/gpu/nova-core/mm.rs @@ -31,6 +31,8 @@ macro_rules! impl_pfn_bounded { }; } +pub(crate) mod pramin; + use core::ops::Range; use kernel::{ diff --git a/drivers/gpu/nova-core/mm/pramin.rs b/drivers/gpu/nova-core/mm/pramin.rs new file mode 100644 index 000000000000..38758ca971be --- /dev/null +++ b/drivers/gpu/nova-core/mm/pramin.rs @@ -0,0 +1,298 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Direct VRAM access through the PRAMIN aperture. +//! +//! PRAMIN provides a 1MB sliding window into VRAM through BAR0, allowing the CPU to access +//! video memory directly. Access is managed through a two-level API: +//! +//! - [`Pramin`]: The parent object that owns the BAR0 reference and synchronization lock. +//! - [`PraminWindow`]: A guard object that holds exclusive PRAMIN access for its lifetime. +//! +//! The PRAMIN aperture is a 1MB region at a fixed offset from BAR0. The window base is +//! controlled by an architecture-specific register and is 64KB aligned. +//! +//! # Examples +//! +//! ## Basic read/write +//! +//! ```no_run +//! use crate::driver::Bar0; +//! use crate::gpu::Chipset; +//! use crate::mm::{pramin, VramAddress}; +//! use kernel::device; +//! use kernel::devres::Devres; +//! use kernel::prelude::*; +//! use kernel::sync::Arc; +//! +//! fn example( +//! devres_bar: Arc<Devres<Bar0>>, +//! dev: &device::Device<device::Bound>, +//! chipset: Chipset, +//! vram_region: core::ops::Range<VramAddress>, +//! ) -> Result<()> { +//! let pramin = Arc::pin_init( +//! pramin::Pramin::new(devres_bar, dev, chipset, vram_region)?, +//! GFP_KERNEL, +//! )?; +//! let mut window = pramin.get_window(dev)?; +//! +//! // Write and read back. +//! window.try_write32(0x100u64, 0xDEADBEEF)?; +//! let val = window.try_read32(0x100u64)?; +//! assert_eq!(val, 0xDEADBEEF); +//! +//! Ok(()) +//! } +//! ``` +//! +//! ## Auto-repositioning across VRAM regions +//! +//! ```no_run +//! use crate::driver::Bar0; +//! use crate::gpu::Chipset; +//! use crate::mm::{pramin, VramAddress}; +//! use kernel::device; +//! use kernel::devres::Devres; +//! use kernel::prelude::*; +//! use kernel::sync::Arc; +//! +//! fn example( +//! devres_bar: Arc<Devres<Bar0>>, +//! dev: &device::Device<device::Bound>, +//! chipset: Chipset, +//! vram_region: core::ops::Range<VramAddress>, +//! ) -> Result<()> { +//! let pramin = Arc::pin_init( +//! pramin::Pramin::new(devres_bar, dev, chipset, vram_region)?, +//! GFP_KERNEL, +//! )?; +//! let mut window = pramin.get_window(dev)?; +//! +//! // Access first 1MB region. +//! window.try_write32(0x100u64, 0x11111111)?; +//! +//! // Access at 2MB - window auto-repositions. +//! window.try_write32(0x200000u64, 0x22222222)?; +//! +//! // Back to first region - window repositions again. +//! let val = window.try_read32(0x100u64)?; +//! assert_eq!(val, 0x11111111); +//! +//! Ok(()) +//! } +//! ``` + +#![expect(unused)] + +use core::ops::Range; + +use crate::{ + bounded_enum, + driver::Bar0, + gpu::Chipset, + mm::VramAddress, + num::IntoSafeCast, + regs, // +}; + +use kernel::{ + device, + devres::Devres, + io::Io, + new_mutex, + prelude::*, + sizes::{ + SZ_1M, + SZ_64K, // + }, + sync::{ + lock::mutex::MutexGuard, + Arc, + Mutex, // + }, +}; + +bounded_enum! { + /// Target memory type for the BAR0 window register. + /// + /// Only VRAM is supported; Hopper+ GPUs do not support other targets. + #[derive(Debug)] + pub(crate) enum Bar0WindowTarget with TryFrom<Bounded<u32, 2>> { + /// Video RAM (GPU framebuffer memory). + Vram = 0, + } +} + +/// PRAMIN aperture base offset in BAR0. +const PRAMIN_BASE: usize = 0x700000; + +/// PRAMIN aperture size (1MB). +const PRAMIN_SIZE: usize = SZ_1M; + +/// Generate a PRAMIN read accessor that takes an absolute VRAM address. +/// +/// `$name` matches the underlying [`Bar0`] method (e.g. `try_read32`). +macro_rules! define_pramin_read { + ($name:ident, $ty:ty) => { + #[doc = concat!("Read a `", stringify!($ty), "` from VRAM at the given address.")] + pub(crate) fn $name(&mut self, vram_addr: impl Into<VramAddress>) -> Result<$ty> { + let (bar_offset, new_base) = + self.compute_window(vram_addr.into(), ::core::mem::size_of::<$ty>())?; + + if let Some(base) = new_base { + regs::pramin_window_write_base(self.chipset.arch(), self.bar, base)?; + *self.state = base; + } + self.bar.$name(bar_offset) + } + }; +} + +/// Generate a PRAMIN write accessor that takes an absolute VRAM address. +/// +/// `$name` matches the underlying [`Bar0`] method (e.g. `try_write32`). +macro_rules! define_pramin_write { + ($name:ident, $ty:ty) => { + #[doc = concat!("Write a `", stringify!($ty), "` to VRAM at the given address.")] + pub(crate) fn $name(&mut self, vram_addr: impl Into<VramAddress>, value: $ty) -> Result { + let (bar_offset, new_base) = + self.compute_window(vram_addr.into(), ::core::mem::size_of::<$ty>())?; + + if let Some(base) = new_base { + regs::pramin_window_write_base(self.chipset.arch(), self.bar, base)?; + *self.state = base; + } + self.bar.$name(value, bar_offset) + } + }; +} + +/// PRAMIN aperture manager. +/// +/// Call [`Pramin::get_window()`] to acquire exclusive PRAMIN access. +#[pin_data] +pub(crate) struct Pramin { + bar: Arc<Devres<Bar0>>, + chipset: Chipset, + /// Valid VRAM region. Accesses outside this range are rejected. + vram_region: Range<VramAddress>, + /// PRAMIN aperture state, protected by a mutex. + /// + /// # Invariants + /// + /// This lock is acquired during the DMA fence signaling critical path. + /// It must NEVER be held across any reclaimable CPU memory / allocations + /// (`GFP_KERNEL`), because the memory reclaim path can call + /// `dma_fence_wait()`, which would deadlock with this lock held. + #[pin] + state: Mutex<VramAddress>, +} + +impl Pramin { + /// Create a pin-initializer for PRAMIN. + /// + /// `vram_region` specifies the valid VRAM address range. + pub(crate) fn new( + bar: Arc<Devres<Bar0>>, + dev: &device::Device<device::Bound>, + chipset: Chipset, + vram_region: Range<VramAddress>, + ) -> Result<impl PinInit<Self>> { + let bar_access = bar.access(dev)?; + let current_base = regs::pramin_window_read_base(chipset.arch(), bar_access); + + Ok(pin_init!(Self { + bar, + chipset, + vram_region, + state <- new_mutex!(current_base, "pramin_state"), + })) + } + + /// Returns the valid VRAM region for this PRAMIN instance. + fn vram_region(&self) -> &Range<VramAddress> { + &self.vram_region + } + + /// Acquire exclusive PRAMIN access. + /// + /// Returns a [`PraminWindow`] guard that provides VRAM read/write accessors. + /// The [`PraminWindow`] is exclusive and only one can exist at a time. + pub(crate) fn get_window<'a>( + &'a self, + dev: &'a device::Device<device::Bound>, + ) -> Result<PraminWindow<'a>> { + let bar = self.bar.access(dev)?; + let state = self.state.lock(); + Ok(PraminWindow { + bar, + chipset: self.chipset, + vram_region: self.vram_region.clone(), + state, + }) + } +} + +/// PRAMIN window guard for direct VRAM access. +/// +/// This guard holds exclusive access to the PRAMIN aperture. The window auto-repositions +/// when accessing VRAM offsets outside the current 1MB range. +/// +/// Only one [`PraminWindow`] can exist at a time per [`Pramin`] instance (enforced by the +/// internal `MutexGuard`). +pub(crate) struct PraminWindow<'a> { + bar: &'a Bar0, + chipset: Chipset, + vram_region: Range<VramAddress>, + state: MutexGuard<'a, VramAddress>, +} + +impl PraminWindow<'_> { + /// Compute window parameters for a VRAM access. + /// + /// Returns (`bar_offset`, `new_base`) where: + /// - `bar_offset`: The BAR0 offset to use for the access. + /// - `new_base`: `Some(base)` if window needs repositioning, `None` otherwise. + fn compute_window( + &self, + vram_addr: VramAddress, + access_size: usize, + ) -> Result<(usize, Option<VramAddress>)> { + // Validate VRAM address is within the valid VRAM region. + let end_addr = vram_addr.checked_add(access_size).ok_or(EINVAL)?; + if vram_addr < self.vram_region.start || end_addr > self.vram_region.end { + return Err(EINVAL); + } + + // Check if access fits within the current 1MB window. + let current_base = *self.state; + if vram_addr >= current_base { + let offset_within: usize = (vram_addr - current_base).into_safe_cast(); + if offset_within + access_size <= PRAMIN_SIZE { + return Ok((PRAMIN_BASE + offset_within, None)); + } + } + + // Access doesn't fit in current window - reposition. + // Hardware requires 64KB alignment for the window base register. + let needed_base = vram_addr.align_down(SZ_64K as u64); + let offset_within: usize = (vram_addr - needed_base).into_safe_cast(); + + // Verify access fits in the 1MB window from the new base. + if offset_within + access_size > PRAMIN_SIZE { + return Err(EINVAL); + } + + Ok((PRAMIN_BASE + offset_within, Some(needed_base))) + } + + define_pramin_read!(try_read8, u8); + define_pramin_read!(try_read16, u16); + define_pramin_read!(try_read32, u32); + define_pramin_read!(try_read64, u64); + + define_pramin_write!(try_write8, u8); + define_pramin_write!(try_write16, u16); + define_pramin_write!(try_write32, u32); + define_pramin_write!(try_write64, u64); +} diff --git a/drivers/gpu/nova-core/nova_core.rs b/drivers/gpu/nova-core/nova_core.rs index 38b8aeb750ba..8bff10dbf327 100644 --- a/drivers/gpu/nova-core/nova_core.rs +++ b/drivers/gpu/nova-core/nova_core.rs @@ -16,6 +16,7 @@ mod firmware; mod gpu; mod gsp; +mod mm; #[macro_use] mod num; mod regs; diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index 6faeed73901d..fb42d96a59b2 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -6,6 +6,10 @@ register::WithBase, Io, // }, + num::{ + Bounded, + TryIntoBounded, // + }, prelude::*, sizes::SizeConstants, time, // @@ -31,6 +35,10 @@ Architecture, Chipset, // }, + mm::{ + pramin::Bar0WindowTarget, + VramAddress, // + }, }; // PMC @@ -115,6 +123,15 @@ fn fmt(&self, f: &mut kernel::fmt::Formatter<'_>) -> kernel::fmt::Result { } } +register! { + /// BAR0 window control for PRAMIN access. + pub(crate) NV_PBUS_BAR0_WINDOW(u32) @ 0x00001700 { + 25:24 target ?=> Bar0WindowTarget; + /// PRAMIN window base byte address (40-bit FB addr; bits 39:16 stored in 23:0). + 23:0 window_base as Bounded<u64, 40> shl 16; + } +} + // PFB register! { @@ -537,3 +554,108 @@ pub(crate) mod ga100 { } } } + +pub(crate) mod gh100 { + use kernel::io::register; + + register! { + /// Hopper register for PRAMIN window. + pub(crate) NV_XAL_EP_BAR0_WINDOW(u32) @ 0x0010_fd40 { + /// PRAMIN window base byte address (38-bit FB addr; bits 37:16 stored in 21:0). + 21:0 window_base as Bounded<u64, 38> shl 16; + } + } +} + +pub(crate) mod gb100 { + use kernel::io::register; + + register! { + /// Blackwell+ register for PRAMIN window. + pub(crate) NV_XAL_EP_BAR0_WINDOW(u32) @ 0x0010_fd40 { + /// PRAMIN window base byte address (39-bit FB addr; bits 38:16 stored in 22:0). + 22:0 window_base as Bounded<u64, 39> shl 16; + } + } +} + +/// Common interface for all PRAMIN window registers across GPU architectures. +pub(crate) trait PraminWindow { + /// Reads the current PRAMIN window base address from this register. + fn read_base(bar: &Bar0) -> VramAddress; + + /// Writes a new PRAMIN window base address into this register. + fn write_base(bar: &Bar0, base: VramAddress) -> Result; +} + +impl PraminWindow for NV_PBUS_BAR0_WINDOW { + fn read_base(bar: &Bar0) -> VramAddress { + VramAddress::new(bar.read(NV_PBUS_BAR0_WINDOW).window_base().into()) + } + + fn write_base(bar: &Bar0, base: VramAddress) -> Result { + let bounded: Bounded<u64, 40> = base.raw().try_into_bounded().ok_or(EINVAL)?; + bar.write_reg( + NV_PBUS_BAR0_WINDOW::zeroed() + .with_target(Bar0WindowTarget::Vram) + .with_window_base(bounded), + ); + Ok(()) + } +} + +impl PraminWindow for gh100::NV_XAL_EP_BAR0_WINDOW { + fn read_base(bar: &Bar0) -> VramAddress { + VramAddress::new(bar.read(gh100::NV_XAL_EP_BAR0_WINDOW).window_base().into()) + } + + fn write_base(bar: &Bar0, base: VramAddress) -> Result { + let bounded: Bounded<u64, 38> = base.raw().try_into_bounded().ok_or(EINVAL)?; + bar.write_reg(gh100::NV_XAL_EP_BAR0_WINDOW::zeroed().with_window_base(bounded)); + Ok(()) + } +} + +impl PraminWindow for gb100::NV_XAL_EP_BAR0_WINDOW { + fn read_base(bar: &Bar0) -> VramAddress { + VramAddress::new(bar.read(gb100::NV_XAL_EP_BAR0_WINDOW).window_base().into()) + } + + fn write_base(bar: &Bar0, base: VramAddress) -> Result { + let bounded: Bounded<u64, 39> = base.raw().try_into_bounded().ok_or(EINVAL)?; + bar.write_reg(gb100::NV_XAL_EP_BAR0_WINDOW::zeroed().with_window_base(bounded)); + Ok(()) + } +} + +/// Reads the current BAR0 PRAMIN window base address, dispatching to the +/// register variant appropriate for `arch`. +pub(crate) fn pramin_window_read_base(arch: Architecture, bar: &Bar0) -> VramAddress { + match arch { + Architecture::Turing | Architecture::Ampere | Architecture::Ada => { + NV_PBUS_BAR0_WINDOW::read_base(bar) + } + Architecture::Hopper => gh100::NV_XAL_EP_BAR0_WINDOW::read_base(bar), + Architecture::BlackwellGB10x | Architecture::BlackwellGB20x => { + gb100::NV_XAL_EP_BAR0_WINDOW::read_base(bar) + } + } +} + +/// Writes a new BAR0 PRAMIN window base address, dispatching to the register +/// variant appropriate for `arch`. +pub(crate) fn pramin_window_write_base( + arch: Architecture, + bar: &Bar0, + base: VramAddress, +) -> Result { + match arch { + Architecture::Turing | Architecture::Ampere | Architecture::Ada => { + NV_PBUS_BAR0_WINDOW::write_base(bar, base) + } + Architecture::Hopper => gh100::NV_XAL_EP_BAR0_WINDOW::write_base(bar, base), + Architecture::BlackwellGB10x | Architecture::BlackwellGB20x => { + gb100::NV_XAL_EP_BAR0_WINDOW::write_base(bar, base) + } + } +} -- 2.34.1
