If `Gsp::boot` fails, the GSP can be left in a state where boot cannot be attempted again unless it is reset first.
To avoid this, we want to run the unload bundle whenever `boot` fails to try and clear the partially-initialized state. Do this by wrapping the unload bundle into a drop guard up until `boot` returns. After that, running the unload bundle becomes the responsibility of the caller. Signed-off-by: Alexandre Courbot <[email protected]> --- drivers/gpu/nova-core/gsp/boot.rs | 67 ++++++++++++++++++++++++++++++++-- drivers/gpu/nova-core/gsp/hal.rs | 19 +++++----- drivers/gpu/nova-core/gsp/hal/gh100.rs | 15 ++++---- drivers/gpu/nova-core/gsp/hal/tu102.rs | 31 ++++++++++------ 4 files changed, 101 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs index 8d6fcc35b653..1f83f63ceeb0 100644 --- a/drivers/gpu/nova-core/gsp/boot.rs +++ b/drivers/gpu/nova-core/gsp/boot.rs @@ -8,7 +8,8 @@ io::poll::read_poll_timeout, pci, prelude::*, - time::Delta, // + time::Delta, + types::ScopeGuard, // }; use crate::{ @@ -31,6 +32,66 @@ }, }; +/// Arguments required to call [`Gsp::unload`](super::Gsp::unload). +/// +/// Stored as their own type to avoid repeating a long and tedious list in [`BootUnloadGuard`]. +pub(super) struct BootUnloadArgs<'a> { + gsp: &'a super::Gsp, + dev: &'a device::Device<device::Bound>, + bar: &'a Bar0, + gsp_falcon: &'a Falcon<Gsp>, + sec2_falcon: &'a Falcon<Sec2>, + unload_bundle: Option<super::UnloadBundle>, +} + +/// Guard that calls [`Gsp::unload`](super::Gsp::unload) with a +/// [`UnloadBundle`](super::UnloadBundle) when dropped. +/// +/// Used to ensure the `UnloadBundle` is run during failure paths. +pub(super) struct BootUnloadGuard<'a> { + guard: ScopeGuard<BootUnloadArgs<'a>, fn(BootUnloadArgs<'a>)>, +} + +impl<'a> BootUnloadGuard<'a> { + /// Wraps `unload_bundle` into a guard that executes it when dropped. + pub(super) fn new( + gsp: &'a super::Gsp, + dev: &'a device::Device<device::Bound>, + bar: &'a Bar0, + gsp_falcon: &'a Falcon<Gsp>, + sec2_falcon: &'a Falcon<Sec2>, + unload_bundle: Option<super::UnloadBundle>, + ) -> Self { + Self { + guard: ScopeGuard::new_with_data( + BootUnloadArgs { + gsp, + dev, + bar, + gsp_falcon, + sec2_falcon, + unload_bundle, + }, + |args| { + let _ = super::Gsp::unload( + args.gsp, + args.dev, + args.bar, + args.gsp_falcon, + args.sec2_falcon, + args.unload_bundle, + ); + }, + ), + } + } + + /// Disarms the guard and returns the [`UnloadBundle`](super::UnloadBundle) it contains. + pub(super) fn dismiss(self) -> Option<super::UnloadBundle> { + self.guard.dismiss().unload_bundle + } +} + impl super::Gsp { /// Attempt to boot the GSP. /// @@ -59,7 +120,7 @@ pub(crate) fn boot( let wpr_meta = Coherent::init(dev, GFP_KERNEL, GspFwWprMeta::new(&gsp_fw, &fb_layout))?; // Perform the chipset-specific boot sequence, and retrieve the unload bundle. - let unload_bundle = hal.boot( + let unload_guard = hal.boot( &self, dev, bar, @@ -99,7 +160,7 @@ pub(crate) fn boot( Err(e) => dev_warn!(pdev, "GPU name unavailable: {:?}\n", e), } - Ok(unload_bundle) + Ok(unload_guard.dismiss()) } /// Shut down the GSP and wait until it is offline. diff --git a/drivers/gpu/nova-core/gsp/hal.rs b/drivers/gpu/nova-core/gsp/hal.rs index 501b852dcb29..88fc3e791114 100644 --- a/drivers/gpu/nova-core/gsp/hal.rs +++ b/drivers/gpu/nova-core/gsp/hal.rs @@ -25,6 +25,7 @@ Chipset, // }, gsp::{ + boot::BootUnloadGuard, Gsp, GspFwWprMeta, // }, @@ -50,20 +51,20 @@ fn run( pub(super) trait GspHal: Send { /// Performs the GSP boot process, loading and running the required firmwares as needed. /// - /// Upon success, returns the [`UnloadBundle`] to be run (if any) in order to properly reset the - /// GSP after it has been stopped. + /// Upon success, returns a guard that runs the GSP unload sequence if GSP boot does not + /// complete. #[allow(clippy::too_many_arguments)] - fn boot( + fn boot<'a>( &self, - gsp: &Gsp, - dev: &device::Device<device::Bound>, - bar: &Bar0, + gsp: &'a Gsp, + dev: &'a device::Device<device::Bound>, + bar: &'a Bar0, chipset: Chipset, fb_layout: &FbLayout, wpr_meta: &Coherent<GspFwWprMeta>, - gsp_falcon: &Falcon<GspEngine>, - sec2_falcon: &Falcon<Sec2>, - ) -> Result<Option<crate::gsp::UnloadBundle>>; + gsp_falcon: &'a Falcon<GspEngine>, + sec2_falcon: &'a Falcon<Sec2>, + ) -> Result<BootUnloadGuard<'a>>; /// Performs HAL-specific post-GSP boot tasks. /// diff --git a/drivers/gpu/nova-core/gsp/hal/gh100.rs b/drivers/gpu/nova-core/gsp/hal/gh100.rs index 0a8b7f763883..9a4bb22578b3 100644 --- a/drivers/gpu/nova-core/gsp/hal/gh100.rs +++ b/drivers/gpu/nova-core/gsp/hal/gh100.rs @@ -18,6 +18,7 @@ fb::FbLayout, gpu::Chipset, gsp::{ + boot::BootUnloadGuard, hal::GspHal, Gsp, GspFwWprMeta, // @@ -31,17 +32,17 @@ impl GspHal for Gh100 { /// /// This path uses FSP to establish a chain of trust and boot GSP-FMC. FSP handles /// the GSP boot internally - no manual GSP reset/boot is needed. - fn boot( + fn boot<'a>( &self, - _gsp: &Gsp, - _dev: &device::Device<device::Bound>, - _bar: &Bar0, + _gsp: &'a Gsp, + _dev: &'a device::Device<device::Bound>, + _bar: &'a Bar0, _chipset: Chipset, _fb_layout: &FbLayout, _wpr_meta: &Coherent<GspFwWprMeta>, - _gsp_falcon: &Falcon<GspEngine>, - _sec2_falcon: &Falcon<Sec2>, - ) -> Result<Option<crate::gsp::UnloadBundle>> { + _gsp_falcon: &'a Falcon<GspEngine>, + _sec2_falcon: &'a Falcon<Sec2>, + ) -> Result<BootUnloadGuard<'a>> { Err(ENOTSUPP) } } diff --git a/drivers/gpu/nova-core/gsp/hal/tu102.rs b/drivers/gpu/nova-core/gsp/hal/tu102.rs index c4ab081f25c4..6a27e7e90279 100644 --- a/drivers/gpu/nova-core/gsp/hal/tu102.rs +++ b/drivers/gpu/nova-core/gsp/hal/tu102.rs @@ -32,6 +32,7 @@ }, gpu::Chipset, gsp::{ + boot::BootUnloadGuard, hal::{ GspHal, UnloadBundle, // @@ -254,21 +255,23 @@ fn run_fwsec_frts( struct Tu102; impl GspHal for Tu102 { - fn boot( + fn boot<'a>( &self, - gsp: &Gsp, - dev: &device::Device<device::Bound>, - bar: &Bar0, + gsp: &'a Gsp, + dev: &'a device::Device<device::Bound>, + bar: &'a Bar0, chipset: Chipset, fb_layout: &FbLayout, wpr_meta: &Coherent<GspFwWprMeta>, - gsp_falcon: &Falcon<GspEngine>, - sec2_falcon: &Falcon<Sec2>, - ) -> Result<Option<crate::gsp::UnloadBundle>> { + gsp_falcon: &'a Falcon<GspEngine>, + sec2_falcon: &'a Falcon<Sec2>, + ) -> Result<BootUnloadGuard<'a>> { let bios = Vbios::new(dev, bar)?; - // Try and prepare the unload bundle. If this fails, the GPU will need to be reset - // before the driver can be probed again. + // Try and prepare the unload bundle. + // + // If the unload bundle creation fails, the GPU will need to be reset before the driver can + // be probed again. let unload_bundle = Sec2UnloadBundle::build(dev, bar, chipset, &bios, gsp_falcon, sec2_falcon) .inspect_err(|e| { @@ -279,8 +282,12 @@ fn boot( "The GPU will need to be reset before the driver can bind again.\n" ); }) - .map(crate::gsp::UnloadBundle) - .ok(); + .ok() + .map(crate::gsp::UnloadBundle); + + // Wrap the unload bundle into a drop guard so it is automatically run upon failure. + let unload_guard = + BootUnloadGuard::new(gsp, dev, bar, gsp_falcon, sec2_falcon, unload_bundle); // FWSEC-FRTS is not executed on chips where the FRTS region size is 0 (e.g. GA100). if !fb_layout.frts.is_empty() { @@ -311,7 +318,7 @@ fn boot( )? .run(dev, bar, sec2_falcon, wpr_meta)?; - Ok(unload_bundle) + Ok(unload_guard) } fn post_boot( -- 2.54.0
