Introduce a kernel module param to set vGPU support in nova-core. vgpu_support = 1 (default): automatic
The driver automatically enables or disables vGPU support based on if the GPU advertises SRIOV caps. vgpu_support = 0: disabled Explicitly disables vGPU support. The driver will not enable vGPU support regardless. Signed-off-by: Zhi Wang <[email protected]> --- drivers/gpu/nova-core/gpu.rs | 37 +++++++++++++++++-- drivers/gpu/nova-core/gsp.rs | 25 +++++++++++++ drivers/gpu/nova-core/gsp/boot.rs | 59 +++++++++++++++++------------- drivers/gpu/nova-core/nova_core.rs | 15 ++++++++ drivers/gpu/nova-core/vgpu.rs | 37 +++++++++++++++++++ 5 files changed, 143 insertions(+), 30 deletions(-) create mode 100644 drivers/gpu/nova-core/vgpu.rs diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index e9d07750fafe..e1c16e1b9ec4 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -23,8 +23,12 @@ fb::SysmemFlush, fsp::FspCotVersion, gfw, - gsp::Gsp, + gsp::{ + Gsp, + GspBootContext, // + }, regs, + vgpu::Vgpu, // }; macro_rules! define_chipset { @@ -180,6 +184,16 @@ pub(crate) enum Architecture { } impl Architecture { + /// Whether this architecture uses SEC2 for GSP boot (vs FSP Chain of Trust). + pub(crate) const fn uses_sec2_boot(&self) -> bool { + matches!(self, Self::Turing | Self::Ampere | Self::Ada) + } + + /// Whether this architecture supports vGPU. + pub(crate) const fn supports_vgpu(&self) -> bool { + matches!(self, Self::Ada | Self::Blackwell) + } + /// Returns the DMA mask supported by this architecture. /// /// Hopper and Blackwell support 52-bit DMA addresses, while earlier architectures @@ -313,7 +327,7 @@ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { pub(crate) struct Gpu { spec: Spec, /// MMIO mapping of PCI BAR 0 - bar: Arc<Devres<Bar0>>, + pub bar: Arc<Devres<Bar0>>, /// System memory page required for flushing all pending GPU-side memory writes done through /// PCIE into system memory, via sysmembar (A GPU-initiated HW memory-barrier operation). sysmem_flush: SysmemFlush, @@ -323,7 +337,8 @@ pub(crate) struct Gpu { sec2_falcon: Falcon<Sec2Falcon>, /// GSP runtime data. Temporarily an empty placeholder. #[pin] - gsp: Gsp, + pub(crate) gsp: Gsp, + vgpu: Vgpu, } impl Gpu { @@ -351,6 +366,8 @@ pub(crate) fn new<'a>( } }, + vgpu: Vgpu::new(pdev, chipset)?, + sysmem_flush: SysmemFlush::register(pdev.as_ref(), bar, chipset)?, gsp_falcon: Falcon::new( @@ -363,7 +380,19 @@ pub(crate) fn new<'a>( gsp <- Gsp::new(pdev), - _: { gsp.boot(pdev, bar, chipset, gsp_falcon, sec2_falcon)? }, + _: { + let mut ctx = GspBootContext { + pdev, + bar, + chipset, + gsp_falcon, + sec2_falcon, + fsp_falcon: None, + vgpu_requested: vgpu.vgpu_requested, + }; + gsp.boot(&mut ctx)?; + vgpu.vgpu_enabled = ctx.vgpu_requested; + }, bar: devres_bar, spec, diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs index 1756ab4732e7..9435c7430dfe 100644 --- a/drivers/gpu/nova-core/gsp.rs +++ b/drivers/gpu/nova-core/gsp.rs @@ -31,6 +31,14 @@ }; use crate::{ + driver::Bar0, + falcon::{ + fsp::Fsp as FspFalcon, + gsp::Gsp as GspFalcon, + sec2::Sec2 as Sec2Falcon, + Falcon, // + }, + gpu::Chipset, gsp::cmdq::Cmdq, gsp::fw::{ GspArgumentsPadded, @@ -45,6 +53,23 @@ /// Number of GSP pages to use in a RM log buffer. const RM_LOG_BUFFER_NUM_PAGES: usize = 0x10; +/// Common context for the GSP boot process. +pub(crate) struct GspBootContext<'a> { + pub(crate) pdev: &'a pci::Device<device::Bound>, + pub(crate) bar: &'a Bar0, + pub(crate) chipset: Chipset, + pub(crate) gsp_falcon: &'a Falcon<GspFalcon>, + pub(crate) sec2_falcon: &'a Falcon<Sec2Falcon>, + pub(crate) fsp_falcon: Option<Falcon<FspFalcon>>, + pub(crate) vgpu_requested: bool, +} + +impl GspBootContext<'_> { + pub(crate) fn dev(&self) -> &device::Device<device::Bound> { + self.pdev.as_ref() + } +} + /// Array of page table entries, as understood by the GSP bootloader. #[repr(C)] struct PteArray<const NUM_ENTRIES: usize>([u64; NUM_ENTRIES]); diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs index b2037ecb9a7f..4238df5c8104 100644 --- a/drivers/gpu/nova-core/gsp/boot.rs +++ b/drivers/gpu/nova-core/gsp/boot.rs @@ -5,7 +5,6 @@ dma::Coherent, io::poll::read_poll_timeout, io_write, - pci, prelude::*, time::Delta, // }; @@ -36,12 +35,10 @@ }, fsp::{ FmcBootArgs, - Fsp, // - }, - gpu::{ - Architecture, - Chipset, // + Fsp, + VgpuMode, // }, + gpu::Chipset, gsp::{ commands, fw::LibosMemoryRegionInitArgument, @@ -98,7 +95,7 @@ fn lockdown_released(&self, bar: &Bar0, fmc_boot_params_addr: u64) -> bool { return true; } - let hwcfg2 = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &crate::falcon::gsp::Gsp::ID); + let hwcfg2 = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &Gsp::ID); !hwcfg2.riscv_br_priv_lockdown() } } @@ -255,9 +252,8 @@ fn boot_via_fsp( gsp_falcon: &Falcon<Gsp>, wpr_meta: &Coherent<GspFwWprMeta>, libos: &Coherent<[LibosMemoryRegionInitArgument]>, + fsp_falcon: &Falcon<FspEngine>, ) -> Result { - let fsp_falcon = Falcon::<FspEngine>::new(dev, chipset)?; - Fsp::wait_secure_boot(dev, bar, chipset.arch())?; let fsp_fw = FspFirmware::new(dev, chipset, FIRMWARE_VERSION)?; @@ -275,7 +271,7 @@ fn boot_via_fsp( &signatures, )?; - Fsp::boot_fmc(dev, bar, &fsp_falcon, &args)?; + Fsp::boot_fmc(dev, bar, fsp_falcon, &args)?; let fmc_boot_params_addr = args.boot_params_dma_handle(); Self::wait_for_gsp_lockdown_release(dev, bar, gsp_falcon, fmc_boot_params_addr)?; @@ -320,18 +316,31 @@ fn wait_for_gsp_lockdown_release( /// Upon return, the GSP is up and running, and its runtime object given as return value. pub(crate) fn boot( self: Pin<&mut Self>, - pdev: &pci::Device<device::Bound>, - bar: &Bar0, - chipset: Chipset, - gsp_falcon: &Falcon<Gsp>, - sec2_falcon: &Falcon<Sec2>, + ctx: &mut super::GspBootContext<'_>, ) -> Result { - let dev = pdev.as_ref(); - let uses_sec2 = matches!( - chipset.arch(), - Architecture::Turing | Architecture::Ampere | Architecture::Ada - ); + let bar = ctx.bar; + let chipset = ctx.chipset; + let arch = chipset.arch(); + let pdev = ctx.pdev; + let gsp_falcon = ctx.gsp_falcon; + let sec2_falcon = ctx.sec2_falcon; + + // For FSP-based architectures (Blackwell), refine the vGPU request + // by reading the PRC knob from FSP - only keep the request if the + // hardware knob is set. + // + // SEC2-based architectures (Ada) keep the initial request as-is + // (module parameter + SR-IOV, already filtered by Vgpu::new). + if !arch.uses_sec2_boot() { + let fsp_falcon = Falcon::<FspEngine>::new(ctx.dev(), chipset)?; + Fsp::wait_secure_boot(ctx.dev(), bar, arch)?; + let vgpu_mode = Fsp::read_vgpu_mode(ctx.dev(), bar, &fsp_falcon)?; + dev_dbg!(ctx.dev(), "vGPU mode: {:?}\n", vgpu_mode); + ctx.fsp_falcon = Some(fsp_falcon); + ctx.vgpu_requested &= vgpu_mode == VgpuMode::Enabled; + } + let dev = ctx.dev(); let gsp_fw = KBox::pin_init(GspFirmware::new(dev, chipset, FIRMWARE_VERSION), GFP_KERNEL)?; let fb_layout = FbLayout::new(chipset, bar, &gsp_fw)?; @@ -341,7 +350,7 @@ pub(crate) fn boot( io_write!(wpr_meta, , GspFwWprMeta::new(&gsp_fw, &fb_layout)); // Architecture-specific boot path - if uses_sec2 { + if arch.uses_sec2_boot() { // SEC2 path: send commands before GSP reset/boot (original order). self.cmdq .send_command_no_wait(bar, commands::SetSystemInfo::new(pdev, chipset))?; @@ -366,6 +375,7 @@ pub(crate) fn boot( gsp_falcon, &wpr_meta, &self.libos, + ctx.fsp_falcon.as_ref().ok_or(ENODEV)?, )?; } @@ -383,10 +393,7 @@ pub(crate) fn boot( dev_dbg!(dev, "RISC-V active? {}\n", gsp_falcon.is_riscv_active(bar)); // For FSP path, send commands after GSP becomes active. - if matches!( - chipset.arch(), - Architecture::Hopper | Architecture::Blackwell - ) { + if !arch.uses_sec2_boot() { self.cmdq .send_command_no_wait(bar, commands::SetSystemInfo::new(pdev, chipset))?; self.cmdq @@ -394,7 +401,7 @@ pub(crate) fn boot( } // SEC2-based architectures need to run the GSP sequencer - if uses_sec2 { + if arch.uses_sec2_boot() { let libos_handle = self.libos.dma_handle(); let seq_params = GspSequencerParams { bootloader_app_version: gsp_fw.bootloader.app_version, diff --git a/drivers/gpu/nova-core/nova_core.rs b/drivers/gpu/nova-core/nova_core.rs index c554ec544ddd..bccdbb412dd0 100644 --- a/drivers/gpu/nova-core/nova_core.rs +++ b/drivers/gpu/nova-core/nova_core.rs @@ -27,6 +27,7 @@ mod regs; mod sbuffer; mod vbios; +mod vgpu; pub(crate) const MODULE_NAME: &core::ffi::CStr = <LocalModule as kernel::ModuleMetadata>::NAME; @@ -75,6 +76,20 @@ fn init(module: &'static kernel::ThisModule) -> impl PinInit<Self, Error> { description: "Nova Core GPU driver", license: "GPL v2", firmware: [], + params: { + // vgpu_support = 1 (default): automatic + // + // The driver automatically enables or disables vGPU support based on if the GPU + // advertises SRIOV caps. + // + // vgpu_support = 0: disabled + // + // Explicitly disables vGPU support. The driver will not enable vGPU support regardless. + vgpu_support: u32 { + default: 1, + description: "Enable vGPU support - (1 = auto (default), 0 = disable)", + }, + }, } kernel::module_firmware!(firmware::ModInfoBuilder); diff --git a/drivers/gpu/nova-core/vgpu.rs b/drivers/gpu/nova-core/vgpu.rs new file mode 100644 index 000000000000..d35081a088cf --- /dev/null +++ b/drivers/gpu/nova-core/vgpu.rs @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 + +use kernel::{ + device, + pci, + prelude::*, // +}; + +use crate::{ + gpu::Chipset, + module_parameters, // +}; + +pub(crate) struct Vgpu { + pub(crate) vgpu_requested: bool, + pub(crate) vgpu_enabled: bool, + pub total_vfs: u16, +} + +impl Vgpu { + pub(crate) fn new(pdev: &pci::Device<device::Bound>, chipset: Chipset) -> Result<Vgpu> { + let total_vfs = if chipset.arch().supports_vgpu() { + match *module_parameters::vgpu_support.value() { + 0 => 0, + _ => pdev.sriov_get_totalvfs().unwrap_or(0), + } + } else { + 0 + }; + + Ok(Vgpu { + vgpu_requested: total_vfs > 0, + vgpu_enabled: false, + total_vfs, + }) + } +} -- 2.51.0
