Turing and GA100 use programmed I/O (PIO) instead of DMA to upload firmware images into Falcon memory.
Signed-off-by: Timur Tabi <[email protected]> --- drivers/gpu/nova-core/falcon.rs | 135 +++++++++++++++++++++++- drivers/gpu/nova-core/falcon/hal.rs | 2 - drivers/gpu/nova-core/firmware/fwsec.rs | 2 +- drivers/gpu/nova-core/gsp/boot.rs | 2 +- drivers/gpu/nova-core/regs.rs | 30 ++++++ 5 files changed, 166 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs index 344354ed50b8..4f3a3b002725 100644 --- a/drivers/gpu/nova-core/falcon.rs +++ b/drivers/gpu/nova-core/falcon.rs @@ -23,6 +23,7 @@ use crate::{ dma::DmaObject, driver::Bar0, + falcon::hal::LoadMethod, gpu::Chipset, num::{ FromSafeCast, @@ -242,7 +243,6 @@ pub(crate) enum FalconMem { /// Secure Instruction Memory. ImemSecure, /// Non-Secure Instruction Memory. - #[expect(unused)] ImemNonSecure, /// Data Memory. Dmem, @@ -410,6 +410,131 @@ pub(crate) fn reset(&self, bar: &Bar0) -> Result { Ok(()) } + /// Write a slice to Falcon memory using programmed I/O (PIO). + /// + /// Writes `img` to the specified `target_mem` (IMEM or DMEM) starting at `mem_base`. + /// For IMEM writes, tags are set for each 256-byte block starting from `start_tag`. + /// For DMEM, start_tag is ignored. + /// + /// Returns `EINVAL` if `img.len()` is not a multiple of 4. + fn pio_wr_slice( + &self, + bar: &Bar0, + img: &[u8], + mem_base: u16, + target_mem: FalconMem, + start_tag: u16, + ) -> Result { + // Rejecting misaligned images here allows us to avoid checking + // inside the loops. + if img.len() % 4 != 0 { + return Err(EINVAL); + } + + // NV_PFALCON_FALCON_IMEMC supports up to four ports, + // but we only ever use one, so just hard-code it. + const PORT: usize = 0; + + match target_mem { + FalconMem::ImemSecure | FalconMem::ImemNonSecure => { + regs::NV_PFALCON_FALCON_IMEMC::default() + .set_secure(target_mem == FalconMem::ImemSecure) + .set_aincw(true) + .set_offs(mem_base) + .write(bar, &E::ID, PORT); + + for (n, block) in img.chunks(256).enumerate() { + let n = u16::try_from(n)?; + let tag: u16 = start_tag.checked_add(n).ok_or(ERANGE)?; + regs::NV_PFALCON_FALCON_IMEMT::default() + .set_tag(tag) + .write(bar, &E::ID, PORT); + for word in block.chunks_exact(4) { + let w = [word[0], word[1], word[2], word[3]]; + regs::NV_PFALCON_FALCON_IMEMD::default() + .set_data(u32::from_le_bytes(w)) + .write(bar, &E::ID, PORT); + } + } + } + FalconMem::Dmem => { + regs::NV_PFALCON_FALCON_DMEMC::default() + .set_aincw(true) + .set_offs(mem_base) + .write(bar, &E::ID, PORT); + + for word in img.chunks_exact(4) { + let w = [word[0], word[1], word[2], word[3]]; + regs::NV_PFALCON_FALCON_DMEMD::default() + .set_data(u32::from_le_bytes(w)) + .write(bar, &E::ID, PORT); + } + } + } + + Ok(()) + } + + /// Perform a PIO write of a firmware section to falcon memory. + /// + /// Extracts the data slice specified by `load_offsets` from `fw` and writes it to + /// `target_mem` using the given port and tag. + fn pio_wr<F: FalconFirmware<Target = E>>( + &self, + bar: &Bar0, + fw: &F, + target_mem: FalconMem, + load_offsets: &FalconLoadTarget, + start_tag: u16, + ) -> Result { + let start = usize::from_safe_cast(load_offsets.src_start); + let len = usize::from_safe_cast(load_offsets.len); + let mem_base = u16::try_from(load_offsets.dst_start)?; + + // SAFETY: we are the only user of the firmware image at this stage + let data = unsafe { fw.as_slice(start, len).map_err(|_| EINVAL)? }; + + self.pio_wr_slice(bar, data, mem_base, target_mem, start_tag) + } + + /// Perform a PIO copy into `IMEM` and `DMEM` of `fw`, and prepare the falcon to run it. + pub(crate) fn pio_load<F: FalconFirmware<Target = E>>(&self, bar: &Bar0, fw: &F) -> Result { + let imem_sec = fw.imem_sec_load_params(); + let imem_ns = fw.imem_ns_load_params().ok_or(EINVAL)?; + let dmem = fw.dmem_load_params(); + + regs::NV_PFALCON_FBIF_CTL::read(bar, &E::ID) + .set_allow_phys_no_ctx(true) + .write(bar, &E::ID); + + regs::NV_PFALCON_FALCON_DMACTL::default().write(bar, &E::ID); + + self.pio_wr( + bar, + fw, + FalconMem::ImemNonSecure, + &imem_ns, + u16::try_from(imem_ns.dst_start >> 8)?, + )?; + self.pio_wr( + bar, + fw, + FalconMem::ImemSecure, + &imem_sec, + u16::try_from(imem_sec.dst_start >> 8)?, + )?; + self.pio_wr(bar, fw, FalconMem::Dmem, &dmem, 0)?; + + self.hal.program_brom(self, bar, &fw.brom_params())?; + + // Set `BootVec` to start of non-secure code. + regs::NV_PFALCON_FALCON_BOOTVEC::default() + .set_value(fw.boot_addr()) + .write(bar, &E::ID); + + Ok(()) + } + /// Perform a DMA write according to `load_offsets` from `dma_handle` into the falcon's /// `target_mem`. /// @@ -638,6 +763,14 @@ pub(crate) fn is_riscv_active(&self, bar: &Bar0) -> bool { self.hal.is_riscv_active(bar) } + // Load a firmware image into Falcon memory + pub(crate) fn load<F: FalconFirmware<Target = E>>(&self, bar: &Bar0, fw: &F) -> Result { + match self.hal.load_method() { + LoadMethod::Pio => self.pio_load(bar, fw), + LoadMethod::Dma => self.dma_load(bar, fw), + } + } + /// Write the application version to the OS register. pub(crate) fn write_os_version(&self, bar: &Bar0, app_version: u32) { regs::NV_PFALCON_FALCON_OS::default() diff --git a/drivers/gpu/nova-core/falcon/hal.rs b/drivers/gpu/nova-core/falcon/hal.rs index fe6de900e8b0..89babd5f9325 100644 --- a/drivers/gpu/nova-core/falcon/hal.rs +++ b/drivers/gpu/nova-core/falcon/hal.rs @@ -17,7 +17,6 @@ /// Method used to load data into falcon memory. Some GPU architectures need /// PIO and others can use DMA. -#[expect(unused)] pub(crate) enum LoadMethod { /// Programmed I/O Pio, @@ -60,7 +59,6 @@ fn signature_reg_fuse_version( fn reset_eng(&self, bar: &Bar0) -> Result; /// returns the method needed to load data into Falcon memory - #[expect(unused)] fn load_method(&self) -> LoadMethod; } diff --git a/drivers/gpu/nova-core/firmware/fwsec.rs b/drivers/gpu/nova-core/firmware/fwsec.rs index 89dc4526041b..a8ec08a500ac 100644 --- a/drivers/gpu/nova-core/firmware/fwsec.rs +++ b/drivers/gpu/nova-core/firmware/fwsec.rs @@ -420,7 +420,7 @@ pub(crate) fn run( .reset(bar) .inspect_err(|e| dev_err!(dev, "Failed to reset GSP falcon: {:?}\n", e))?; falcon - .dma_load(bar, self) + .load(bar, self) .inspect_err(|e| dev_err!(dev, "Failed to load FWSEC firmware: {:?}\n", e))?; let (mbox0, _) = falcon .boot(bar, Some(0), None) diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs index 581b412554dc..be427fe26a58 100644 --- a/drivers/gpu/nova-core/gsp/boot.rs +++ b/drivers/gpu/nova-core/gsp/boot.rs @@ -183,7 +183,7 @@ pub(crate) fn boot( ); sec2_falcon.reset(bar)?; - sec2_falcon.dma_load(bar, &booter_loader)?; + sec2_falcon.load(bar, &booter_loader)?; let wpr_handle = wpr_meta.dma_handle(); let (mbox0, mbox1) = sec2_falcon.boot( bar, diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index ea0d32f5396c..53f412f0ca32 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -364,6 +364,36 @@ pub(crate) fn with_falcon_mem(self, mem: FalconMem) -> Self { 1:1 startcpu as bool; }); +// IMEM access control register. Up to 4 ports are available for IMEM access. +register!(NV_PFALCON_FALCON_IMEMC @ PFalconBase[0x00000180[4; 16]] { + 15:0 offs as u16, "IMEM block and word offset"; + 24:24 aincw as bool, "Auto-increment on write"; + 28:28 secure as bool, "Access secure IMEM"; +}); + +// IMEM data register. Reading/writing this register accesses IMEM at the address +// specified by the corresponding IMEMC register. +register!(NV_PFALCON_FALCON_IMEMD @ PFalconBase[0x00000184[4; 16]] { + 31:0 data as u32; +}); + +// IMEM tag register. Used to set the tag for the current IMEM block. +register!(NV_PFALCON_FALCON_IMEMT @ PFalconBase[0x00000188[4; 16]] { + 15:0 tag as u16; +}); + +// DMEM access control register. Up to 8 ports are available for DMEM access. +register!(NV_PFALCON_FALCON_DMEMC @ PFalconBase[0x000001c0[8; 8]] { + 15:0 offs as u16, "DMEM block and word offset"; + 24:24 aincw as bool, "Auto-increment on write"; +}); + +// DMEM data register. Reading/writing this register accesses DMEM at the address +// specified by the corresponding DMEMC register. +register!(NV_PFALCON_FALCON_DMEMD @ PFalconBase[0x000001c4[8; 8]] { + 31:0 data as u32; +}); + // Actually known as `NV_PSEC_FALCON_ENGINE` and `NV_PGSP_FALCON_ENGINE` depending on the falcon // instance. register!(NV_PFALCON_FALCON_ENGINE @ PFalconBase[0x000003c0] { -- 2.52.0
