When probing the driver, the FWSEC-FRTS firmware creates a WPR2 secure memory region to store the GSP firmware, and the Booter Loader loads and starts that firmware into the GSP, making it run in RISC-V mode.
These operations need to be reverted upon unloading, particularly the WPR2 secure region creation, as its presence prevents the driver from subsequently probing. Thus, prepare the Booter Unloader and FWSEC-SB firmwares when booting the GSP, so they can be executed at unbind time to put the GPU into a state where it can be probed again. Signed-off-by: Alexandre Courbot <[email protected]> --- drivers/gpu/nova-core/firmware/booter.rs | 1 - drivers/gpu/nova-core/firmware/fwsec.rs | 1 - drivers/gpu/nova-core/gpu.rs | 2 +- drivers/gpu/nova-core/gsp.rs | 3 + drivers/gpu/nova-core/gsp/boot.rs | 18 +++- drivers/gpu/nova-core/gsp/hal.rs | 21 ++++- drivers/gpu/nova-core/gsp/hal/gh100.rs | 7 +- drivers/gpu/nova-core/gsp/hal/tu102.rs | 141 ++++++++++++++++++++++++++++++- drivers/gpu/nova-core/regs.rs | 5 ++ 9 files changed, 188 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/nova-core/firmware/booter.rs b/drivers/gpu/nova-core/firmware/booter.rs index 6a41690e72c6..6f421906c272 100644 --- a/drivers/gpu/nova-core/firmware/booter.rs +++ b/drivers/gpu/nova-core/firmware/booter.rs @@ -281,7 +281,6 @@ fn new_booter(data: &[u8]) -> Result<Self> { #[derive(Copy, Clone, Debug, PartialEq)] pub(crate) enum BooterKind { Loader, - #[expect(unused)] Unloader, } diff --git a/drivers/gpu/nova-core/firmware/fwsec.rs b/drivers/gpu/nova-core/firmware/fwsec.rs index 8810cb49db67..4108f28cd338 100644 --- a/drivers/gpu/nova-core/firmware/fwsec.rs +++ b/drivers/gpu/nova-core/firmware/fwsec.rs @@ -144,7 +144,6 @@ pub(crate) enum FwsecCommand { /// image into it. Frts { frts_addr: u64, frts_size: u64 }, /// Asks [`FwsecFirmware`] to load pre-OS apps on the PMU. - #[expect(dead_code)] Sb, } diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index 75fe1bdb80fe..bcd403781a1a 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -297,7 +297,7 @@ pub(crate) fn new( pub(crate) fn unbind(&self, pdev: &'bound pci::Device<device::Core>) { let _ = self .gsp - .unload(pdev.as_ref(), self.bar, &self.gsp_falcon) + .unload(pdev.as_ref(), self.bar, &self.gsp_falcon, &self.sec2_falcon) .inspect_err(|e| dev_err!(pdev, "failed to unload GSP: {:?}\n", e)); } } diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs index 38378f104068..58b917172efb 100644 --- a/drivers/gpu/nova-core/gsp.rs +++ b/drivers/gpu/nova-core/gsp.rs @@ -126,6 +126,8 @@ pub(crate) struct Gsp { pub(crate) cmdq: Cmdq, /// RM arguments. rmargs: Coherent<GspArgumentsPadded>, + /// Ready-to-run GSP unload bundle, if any. + unload: Option<KBox<dyn hal::UnloadBundle>>, } impl Gsp { @@ -181,6 +183,7 @@ pub(crate) fn new(pdev: &pci::Device<device::Bound>) -> impl PinInit<Self, Error dir.read_binary_file(c"logrm", &logs.logrm.0); }) }, + unload: None, })) }) } diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs index 59c1b4d030ae..61f10c0d2fd2 100644 --- a/drivers/gpu/nova-core/gsp/boot.rs +++ b/drivers/gpu/nova-core/gsp/boot.rs @@ -56,8 +56,8 @@ pub(crate) fn boot( let wpr_meta = Coherent::init(dev, GFP_KERNEL, GspFwWprMeta::new(&gsp_fw, &fb_layout))?; - // Perform the chipset-specific boot sequence. - hal.boot( + // Perform the chipset-specific boot sequence, and retrieve the unload bundle. + let unload_bundle = hal.boot( self.as_mut(), dev, bar, @@ -67,6 +67,7 @@ pub(crate) fn boot( gsp_falcon, sec2_falcon, )?; + *self.as_mut().project().unload = unload_bundle; gsp_falcon.write_os_version(bar, gsp_fw.bootloader.app_version); @@ -129,6 +130,7 @@ pub(crate) fn unload( dev: &device::Device<device::Bound>, bar: &Bar0, gsp_falcon: &Falcon<Gsp>, + sec2_falcon: &Falcon<Sec2>, ) -> Result { // Shut down the GSP. Self::shutdown_gsp( @@ -139,6 +141,18 @@ pub(crate) fn unload( ) .inspect_err(|e| dev_err!(dev, "Unload guest driver failed: {:?}\n", e))?; + // With the GSP shut down, reset the GSP so it can be restarted. + if let Some(unload_bundle) = self.unload.as_ref() { + unload_bundle.run(dev, bar, gsp_falcon, sec2_falcon)?; + } else { + dev_warn!( + dev, + "Unload bundle is missing, GSP won't be properly reset.\n" + ); + } + + dev_info!(dev, "GSP successfully unloaded\n"); + Ok(()) } } diff --git a/drivers/gpu/nova-core/gsp/hal.rs b/drivers/gpu/nova-core/gsp/hal.rs index 4d8c1998f4cf..d9c324226dda 100644 --- a/drivers/gpu/nova-core/gsp/hal.rs +++ b/drivers/gpu/nova-core/gsp/hal.rs @@ -29,9 +29,28 @@ }, }; +/// Trait for types containing the resources and code required to fully reset the GSP. +/// +/// The GSP unload code might run in a situation where we cannot load firmware dynamically (e.g. +/// because we are in shutdown and the file system is not accessible anymore). Thus, the firmware +/// required for unloading is prepared at load time, and stored here until it needs to be run. +pub(super) trait UnloadBundle: Send { + /// Performs the steps required to properly reset the GSP after it has been stopped. + fn run( + &self, + dev: &device::Device<device::Bound>, + bar: &Bar0, + gsp_falcon: &Falcon<GspEngine>, + sec2_falcon: &Falcon<Sec2>, + ) -> Result; +} + /// Trait implemented by GSP HALs. pub(super) trait GspHal: Send { /// Performs the GSP boot process, loading and running the required firmwares as needed. + /// + /// Upon success, returns the [`UnloadBundle`] to be run (if any) in order to properly reset the + /// GSP after it has been stopped. #[allow(clippy::too_many_arguments)] fn boot( &self, @@ -43,7 +62,7 @@ fn boot( wpr_meta: &Coherent<GspFwWprMeta>, gsp_falcon: &Falcon<GspEngine>, sec2_falcon: &Falcon<Sec2>, - ) -> Result; + ) -> Result<Option<KBox<dyn UnloadBundle>>>; /// Performs HAL-specific post-GSP boot tasks. /// diff --git a/drivers/gpu/nova-core/gsp/hal/gh100.rs b/drivers/gpu/nova-core/gsp/hal/gh100.rs index 9d7e9f4454b1..51e1099cda0d 100644 --- a/drivers/gpu/nova-core/gsp/hal/gh100.rs +++ b/drivers/gpu/nova-core/gsp/hal/gh100.rs @@ -17,7 +17,10 @@ fb::FbLayout, gpu::Chipset, gsp::{ - hal::GspHal, + hal::{ + GspHal, + UnloadBundle, // + }, Gsp, GspFwWprMeta, // }, @@ -40,7 +43,7 @@ fn boot( _wpr_meta: &Coherent<GspFwWprMeta>, _gsp_falcon: &Falcon<GspEngine>, _sec2_falcon: &Falcon<Sec2>, - ) -> Result { + ) -> Result<Option<KBox<dyn UnloadBundle>>> { Err(ENOTSUPP) } } diff --git a/drivers/gpu/nova-core/gsp/hal/tu102.rs b/drivers/gpu/nova-core/gsp/hal/tu102.rs index d5da23cd8c90..c6c947696ba8 100644 --- a/drivers/gpu/nova-core/gsp/hal/tu102.rs +++ b/drivers/gpu/nova-core/gsp/hal/tu102.rs @@ -31,7 +31,10 @@ }, gpu::Chipset, gsp::{ - hal::GspHal, + hal::{ + GspHal, + UnloadBundle, // + }, sequencer::{ GspSequencer, GspSequencerParams, // @@ -43,6 +46,124 @@ vbios::Vbios, // }; +// A ready-to-run FWSEC unload firmware. +// +// Since there are two variants of the prepared firmware (with and without a bootloader), this type +// abstracts the difference. +enum FwsecUnloadFirmware { + WithoutBl(FwsecFirmware), + WithBl(FwsecFirmwareWithBl), +} + +impl FwsecUnloadFirmware { + /// Loads the FWSEC SB firmware, as well as its bootloader if `chipset` requires it. + fn new( + dev: &device::Device<device::Bound>, + bar: &Bar0, + chipset: Chipset, + bios: &Vbios, + gsp_falcon: &Falcon<GspEngine>, + ) -> Result<Self> { + let fwsec_sb = FwsecFirmware::new(dev, gsp_falcon, bar, bios, FwsecCommand::Sb)?; + + Ok(if chipset.needs_fwsec_bootloader() { + Self::WithBl(FwsecFirmwareWithBl::new(fwsec_sb, dev, chipset)?) + } else { + Self::WithoutBl(fwsec_sb) + }) + } + + /// Runs the FWSEC SB firmware. + fn run( + &self, + dev: &device::Device<device::Bound>, + bar: &Bar0, + gsp_falcon: &Falcon<GspEngine>, + ) -> Result<()> { + match self { + Self::WithoutBl(fw) => fw.run(dev, gsp_falcon, bar), + Self::WithBl(fw) => fw.run(dev, gsp_falcon, bar), + } + } +} + +// Contains the firmware required to fully reset GSP on chipsets where the GSP is started using +// FWSEC/Booter. +pub(super) struct Sec2UnloadBundle { + fwsec_sb: FwsecUnloadFirmware, + booter_unloader: BooterFirmware, +} + +impl Sec2UnloadBundle { + /// Load and prepare the resources required to properly reset the GSP after it has been stopped. + fn build( + dev: &device::Device<device::Bound>, + bar: &Bar0, + chipset: Chipset, + bios: &Vbios, + gsp_falcon: &Falcon<GspEngine>, + sec2_falcon: &Falcon<Sec2>, + ) -> Result<KBox<dyn UnloadBundle>> { + KBox::new( + Self { + fwsec_sb: FwsecUnloadFirmware::new(dev, bar, chipset, bios, gsp_falcon)?, + booter_unloader: BooterFirmware::new( + dev, + BooterKind::Unloader, + chipset, + FIRMWARE_VERSION, + sec2_falcon, + bar, + )?, + }, + GFP_KERNEL, + ) + .map(|b| b as KBox<dyn UnloadBundle>) + .map_err(Into::into) + } +} + +impl UnloadBundle for Sec2UnloadBundle { + fn run( + &self, + dev: &device::Device<device::Bound>, + bar: &Bar0, + gsp_falcon: &Falcon<GspEngine>, + sec2_falcon: &Falcon<Sec2>, + ) -> Result<()> { + // Run FWSEC-SB to reset the GSP falcon to its pre-libos state. + self.fwsec_sb.run(dev, bar, gsp_falcon)?; + + // Remove WPR2 region if set. + let wpr2_hi = bar.read(regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI); + if wpr2_hi.is_wpr2_set() { + sec2_falcon.reset(bar)?; + sec2_falcon.load(dev, bar, &self.booter_unloader)?; + + // Sentinel value to confirm that Booter Unloader has run. + const MAILBOX_SENTINEL: u32 = 0xff; + let (mbox0, _) = + sec2_falcon.boot(bar, Some(MAILBOX_SENTINEL), Some(MAILBOX_SENTINEL))?; + if mbox0 != 0 { + dev_err!(dev, "Booter Unloader returned error 0x{:x}\n", mbox0); + return Err(EINVAL); + } + + // Confirm that the WPR2 region has been removed. + let wpr2_hi = bar.read(regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI); + if wpr2_hi.is_wpr2_set() { + dev_err!( + dev, + "WPR2 region still set after Booter Unloader returned\n" + ); + return Err(EBUSY); + } + } + + Ok(()) + } +} + /// Helper function to load and run the FWSEC-FRTS firmware and confirm that it has properly /// created the WPR2 region. fn run_fwsec_frts( @@ -161,7 +282,7 @@ fn boot( wpr_meta: &Coherent<GspFwWprMeta>, gsp_falcon: &Falcon<GspEngine>, sec2_falcon: &Falcon<Sec2>, - ) -> Result { + ) -> Result<Option<KBox<dyn UnloadBundle>>> { let bios = Vbios::new(dev, bar)?; // FWSEC-FRTS is not executed on chips where the FRTS region size is 0 (e.g. GA100). @@ -185,7 +306,21 @@ fn boot( run_booter(dev, bar, chipset, sec2_falcon, wpr_meta)?; - Ok(()) + // Last, try and prepare the unload bundle. If this fails, the GPU will need to be reset + // before the driver can be probed again. + let unload_bundle = + Sec2UnloadBundle::build(dev, bar, chipset, &bios, gsp_falcon, sec2_falcon) + .inspect_err(|e| { + dev_warn!(dev, "Failed to prepare unload firmware: {:?}\n", e); + dev_warn!(dev, "The GSP won't be able to unload properly on unbind.\n"); + dev_warn!( + dev, + "The GPU will need to be reset before the driver can bind again.\n" + ); + }) + .ok(); + + Ok(unload_bundle) } fn post_boot( diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index 6faeed73901d..356fbf364ea5 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -175,6 +175,11 @@ impl NV_PFB_PRI_MMU_WPR2_ADDR_HI { pub(crate) fn higher_bound(self) -> u64 { u64::from(self.hi_val()) << 12 } + + /// Returns whether the WPR2 region is currently set. + pub(crate) fn is_wpr2_set(self) -> bool { + self.hi_val() != 0 + } } // PGSP -- 2.54.0
