From: Timur Tabi <[email protected]> Turing and GA100 use programmed I/O (PIO) instead of DMA to upload firmware images into Falcon memory.
Signed-off-by: Timur Tabi <[email protected]> Co-developed-by: Alexandre Courbot <[email protected]> Signed-off-by: Alexandre Courbot <[email protected]> --- drivers/gpu/nova-core/falcon.rs | 230 +++++++++++++++++++++++++++++++++++- drivers/gpu/nova-core/falcon/hal.rs | 6 +- drivers/gpu/nova-core/regs.rs | 30 +++++ 3 files changed, 263 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs index 7f52b051206a..f8de36abd135 100644 --- a/drivers/gpu/nova-core/falcon.rs +++ b/drivers/gpu/nova-core/falcon.rs @@ -361,6 +361,138 @@ pub(crate) trait FalconDmaLoadable: Deref<Target = DmaObject> { /// Returns the load parameters for `DMEM`. fn dmem_load_params(&self) -> FalconDmaLoadTarget; + + /// Returns an adapter that provides the required parameter to load this firmware using PIO. + /// + /// This can only fail if some `u32` fields cannot be converted to `u16`, or if the indices in + /// the headers are invalid. + fn try_as_pio_loadable(&self) -> Result<FalconDmaFirmwarePioAdapter<'_, Self>> { + let new_pio_imem = |params: FalconDmaLoadTarget, secure| { + // SAFETY: we keep a reference to `self` for as long as this slice is alive, and the + // device will not access this DMA object since we are using PIO. + let data = unsafe { + self.as_slice( + usize::from_safe_cast(params.src_start), + usize::from_safe_cast(params.len), + )? + }; + + let dst_start = u16::try_from(params.dst_start).map_err(|_| EINVAL)?; + + Ok::<_, Error>(FalconPioImemLoadTarget { + data, + dst_start, + secure, + start_tag: dst_start >> 8, + }) + }; + + let imem_sec = new_pio_imem(self.imem_sec_load_params(), true)?; + + let imem_ns = if let Some(params) = self.imem_ns_load_params() { + Some(new_pio_imem(params, false)?) + } else { + None + }; + + let dmem = { + let params = self.dmem_load_params(); + + // SAFETY: we keep a reference to `self` for as long as this slice is alive, and the + // device will not access this DMA object since we are using PIO. + let data = unsafe { + self.as_slice( + usize::from_safe_cast(params.src_start), + usize::from_safe_cast(params.len), + )? + }; + + let dst_start = u16::try_from(params.dst_start).map_err(|_| EINVAL)?; + + FalconPioDmemLoadTarget { data, dst_start } + }; + + Ok(FalconDmaFirmwarePioAdapter { + fw: self, + imem_sec, + imem_ns, + dmem, + }) + } +} + +/// Represents a portion of the firmware to be loaded into IMEM using PIO. +#[derive(Clone)] +pub(crate) struct FalconPioImemLoadTarget<'a> { + pub(crate) data: &'a [u8], + pub(crate) dst_start: u16, + pub(crate) secure: bool, + pub(crate) start_tag: u16, +} + +/// Represents a portion of the firmware to be loaded into DMEM using PIO. +#[derive(Clone)] +pub(crate) struct FalconPioDmemLoadTarget<'a> { + pub(crate) data: &'a [u8], + pub(crate) dst_start: u16, +} + +/// Trait for providing PIO load parameters of falcon firmwares. +pub(crate) trait FalconPioLoadable { + /// Returns the load parameters for Secure `IMEM`, if any. + fn imem_sec_load_params(&self) -> Option<FalconPioImemLoadTarget<'_>>; + + /// Returns the load parameters for Non-Secure `IMEM`, if any. + fn imem_ns_load_params(&self) -> Option<FalconPioImemLoadTarget<'_>>; + + /// Returns the load parameters for `DMEM`. + fn dmem_load_params(&self) -> FalconPioDmemLoadTarget<'_>; +} + +/// Adapter type that makes any DMA-loadable firmware also loadable via PIO. +/// +/// Created using [`FalconDmaLoadable::try_as_pio_loadable`]. +pub(crate) struct FalconDmaFirmwarePioAdapter<'a, T: FalconDmaLoadable + ?Sized> { + /// Reference to the DMA firmware. + fw: &'a T, + /// Validated secure IMEM parameters. + imem_sec: FalconPioImemLoadTarget<'a>, + /// Validated non-secure IMEM parameters. + imem_ns: Option<FalconPioImemLoadTarget<'a>>, + /// Validated DMEM parameters. + dmem: FalconPioDmemLoadTarget<'a>, +} + +impl<'a, T> FalconPioLoadable for FalconDmaFirmwarePioAdapter<'a, T> +where + T: FalconDmaLoadable + ?Sized, +{ + fn imem_sec_load_params(&self) -> Option<FalconPioImemLoadTarget<'_>> { + Some(self.imem_sec.clone()) + } + + fn imem_ns_load_params(&self) -> Option<FalconPioImemLoadTarget<'_>> { + self.imem_ns.clone() + } + + fn dmem_load_params(&self) -> FalconPioDmemLoadTarget<'_> { + self.dmem.clone() + } +} + +impl<'a, T> FalconFirmware for FalconDmaFirmwarePioAdapter<'a, T> +where + T: FalconDmaLoadable + FalconFirmware + ?Sized, +{ + type Target = <T as FalconFirmware>::Target; + + fn brom_params(&self) -> FalconBromParams { + self.fw.brom_params() + } + + fn boot_addr(&self) -> u32 { + self.fw.boot_addr() + } } /// Trait for a falcon firmware. @@ -412,6 +544,99 @@ pub(crate) fn reset(&self, bar: &Bar0) -> Result { Ok(()) } + /// Falcons supports up to four ports, but we only ever use one, so just hard-code it. + const PIO_PORT: usize = 0; + + /// Write a slice to Falcon IMEM memory using programmed I/O (PIO). + /// + /// Returns `EINVAL` if `img.len()` is not a multiple of 4. + fn pio_wr_imem_slice(&self, bar: &Bar0, load_offsets: FalconPioImemLoadTarget<'_>) -> Result { + // Rejecting misaligned images here allows us to avoid checking + // inside the loops. + if load_offsets.data.len() % 4 != 0 { + return Err(EINVAL); + } + + regs::NV_PFALCON_FALCON_IMEMC::default() + .set_secure(load_offsets.secure) + .set_aincw(true) + .set_offs(load_offsets.dst_start) + .write(bar, &E::ID, Self::PIO_PORT); + + for (n, block) in load_offsets.data.chunks(256).enumerate() { + let n = u16::try_from(n)?; + let tag: u16 = load_offsets.start_tag.checked_add(n).ok_or(ERANGE)?; + regs::NV_PFALCON_FALCON_IMEMT::default().set_tag(tag).write( + bar, + &E::ID, + Self::PIO_PORT, + ); + for word in block.chunks_exact(4) { + let w = [word[0], word[1], word[2], word[3]]; + regs::NV_PFALCON_FALCON_IMEMD::default() + .set_data(u32::from_le_bytes(w)) + .write(bar, &E::ID, Self::PIO_PORT); + } + } + + Ok(()) + } + + /// Write a slice to Falcon DMEM memory using programmed I/O (PIO). + /// + /// Returns `EINVAL` if `img.len()` is not a multiple of 4. + fn pio_wr_dmem_slice(&self, bar: &Bar0, load_offsets: FalconPioDmemLoadTarget<'_>) -> Result { + // Rejecting misaligned images here allows us to avoid checking + // inside the loops. + if load_offsets.data.len() % 4 != 0 { + return Err(EINVAL); + } + + regs::NV_PFALCON_FALCON_DMEMC::default() + .set_aincw(true) + .set_offs(load_offsets.dst_start) + .write(bar, &E::ID, Self::PIO_PORT); + + for word in load_offsets.data.chunks_exact(4) { + let w = [word[0], word[1], word[2], word[3]]; + regs::NV_PFALCON_FALCON_DMEMD::default() + .set_data(u32::from_le_bytes(w)) + .write(bar, &E::ID, Self::PIO_PORT); + } + + Ok(()) + } + + /// Perform a PIO copy into `IMEM` and `DMEM` of `fw`, and prepare the falcon to run it. + pub(crate) fn pio_load<F: FalconFirmware<Target = E> + FalconPioLoadable>( + &self, + bar: &Bar0, + fw: &F, + ) -> Result { + regs::NV_PFALCON_FBIF_CTL::read(bar, &E::ID) + .set_allow_phys_no_ctx(true) + .write(bar, &E::ID); + + regs::NV_PFALCON_FALCON_DMACTL::default().write(bar, &E::ID); + + if let Some(imem_ns) = fw.imem_ns_load_params() { + self.pio_wr_imem_slice(bar, imem_ns)?; + } + if let Some(imem_sec) = fw.imem_sec_load_params() { + self.pio_wr_imem_slice(bar, imem_sec)?; + } + self.pio_wr_dmem_slice(bar, fw.dmem_load_params())?; + + self.hal.program_brom(self, bar, &fw.brom_params())?; + + // Set `BootVec` to start of non-secure code. + regs::NV_PFALCON_FALCON_BOOTVEC::default() + .set_value(fw.boot_addr()) + .write(bar, &E::ID); + + Ok(()) + } + /// Perform a DMA write according to `load_offsets` from `dma_handle` into the falcon's /// `target_mem`. /// @@ -645,7 +870,8 @@ pub(crate) fn is_riscv_active(&self, bar: &Bar0) -> bool { self.hal.is_riscv_active(bar) } - // Load a firmware image into Falcon memory + /// Load a firmware image into Falcon memory, using the preferred method for the current + /// chipset. pub(crate) fn load<F: FalconFirmware<Target = E> + FalconDmaLoadable>( &self, bar: &Bar0, @@ -653,7 +879,7 @@ pub(crate) fn load<F: FalconFirmware<Target = E> + FalconDmaLoadable>( ) -> Result { match self.hal.load_method() { LoadMethod::Dma => self.dma_load(bar, fw), - LoadMethod::Pio => Err(ENOTSUPP), + LoadMethod::Pio => self.pio_load(bar, &fw.try_as_pio_loadable()?), } } diff --git a/drivers/gpu/nova-core/falcon/hal.rs b/drivers/gpu/nova-core/falcon/hal.rs index 89babd5f9325..a7e5ea8d0272 100644 --- a/drivers/gpu/nova-core/falcon/hal.rs +++ b/drivers/gpu/nova-core/falcon/hal.rs @@ -58,7 +58,11 @@ fn signature_reg_fuse_version( /// Reset the falcon engine. fn reset_eng(&self, bar: &Bar0) -> Result; - /// returns the method needed to load data into Falcon memory + /// Returns the method used to load data into the falcon's memory. + /// + /// The only chipsets supporting PIO are those < GA102, and PIO is the preferred method for + /// these. For anything above, the PIO registers appear to be masked to the CPU, so DMA is the + /// only usable method. fn load_method(&self) -> LoadMethod; } diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index ea0d32f5396c..53f412f0ca32 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -364,6 +364,36 @@ pub(crate) fn with_falcon_mem(self, mem: FalconMem) -> Self { 1:1 startcpu as bool; }); +// IMEM access control register. Up to 4 ports are available for IMEM access. +register!(NV_PFALCON_FALCON_IMEMC @ PFalconBase[0x00000180[4; 16]] { + 15:0 offs as u16, "IMEM block and word offset"; + 24:24 aincw as bool, "Auto-increment on write"; + 28:28 secure as bool, "Access secure IMEM"; +}); + +// IMEM data register. Reading/writing this register accesses IMEM at the address +// specified by the corresponding IMEMC register. +register!(NV_PFALCON_FALCON_IMEMD @ PFalconBase[0x00000184[4; 16]] { + 31:0 data as u32; +}); + +// IMEM tag register. Used to set the tag for the current IMEM block. +register!(NV_PFALCON_FALCON_IMEMT @ PFalconBase[0x00000188[4; 16]] { + 15:0 tag as u16; +}); + +// DMEM access control register. Up to 8 ports are available for DMEM access. +register!(NV_PFALCON_FALCON_DMEMC @ PFalconBase[0x000001c0[8; 8]] { + 15:0 offs as u16, "DMEM block and word offset"; + 24:24 aincw as bool, "Auto-increment on write"; +}); + +// DMEM data register. Reading/writing this register accesses DMEM at the address +// specified by the corresponding DMEMC register. +register!(NV_PFALCON_FALCON_DMEMD @ PFalconBase[0x000001c4[8; 8]] { + 31:0 data as u32; +}); + // Actually known as `NV_PSEC_FALCON_ENGINE` and `NV_PGSP_FALCON_ENGINE` depending on the falcon // instance. register!(NV_PFALCON_FALCON_ENGINE @ PFalconBase[0x000003c0] { -- 2.53.0
