On Fri, Nov 14, 2025 at 05:30:44PM -0600, Timur Tabi wrote:
> Turing and GA100 use programmed I/O (PIO) instead of DMA to upload
> firmware images into Falcon memory.
>
> A new firmware called the Generic Bootloader (as opposed to the
> GSP Bootloader) is used to upload FWSEC.
>
> Signed-off-by: Timur Tabi <[email protected]>
> ---
> drivers/gpu/nova-core/falcon.rs | 181 ++++++++++++++++++++++++
> drivers/gpu/nova-core/firmware.rs | 4 +-
> drivers/gpu/nova-core/firmware/fwsec.rs | 112 ++++++++++++++-
> drivers/gpu/nova-core/gsp/boot.rs | 10 +-
> 4 files changed, 299 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs
> index 7af32f65ba5f..f9a4a35b7569 100644
> --- a/drivers/gpu/nova-core/falcon.rs
> +++ b/drivers/gpu/nova-core/falcon.rs
> @@ -20,6 +20,10 @@
> use crate::{
> dma::DmaObject,
> driver::Bar0,
> + firmware::fwsec::{
> + BootloaderDmemDescV2,
> + GenericBootloader, //
> + },
> gpu::Chipset,
> num::{
> FromSafeCast,
> @@ -400,6 +404,183 @@ pub(crate) fn reset(&self, bar: &Bar0) -> Result {
> Ok(())
> }
>
> +
> + /// See nvkm_falcon_pio_wr - takes a byte array instead of a
> FalconFirmware
> + fn pio_wr_bytes(
> + &self,
> + bar: &Bar0,
> + source: *const u8,
> + mem_base: u16,
> + length: usize,
> + target_mem: FalconMem,
> + port: u8,
> + tag: u16
Please don't use pointers for source, use slices instead, then you don't need
to assume length is multiple of 4, you can just return error if it is.
fn pio_wr_bytes(
&self,
bar: &Bar0,
data: &[u8],
mem_base: u16,
target_mem: FalconMem,
port: u8,
tag: u16
) -> Result {
> + ) -> Result {
> + // To avoid unnecessary complication in the write loop, make sure
> the buffer
> + // length is aligned. It always is, which is why an assertion is
> okay.
> + assert!((length % 4) == 0);
Can get rid of this then and just return error if it is not multiple of 4.
> +
> + // From now on, we treat the data as an array of u32
> +
> + let length = length / 4;
> + let mut remaining_len: usize = length;
> + let mut img_offset: usize = 0;
> + let mut tag = tag;
> +
> + // Get data as a slice of u32s
> + let img = unsafe {
Missing safety comment. Please go over the coding guidelines and format
comments according to guidelines.
> + core::slice::from_raw_parts(source as *const u32, length)
> + };
> +
> + match target_mem {
> + FalconMem::ImemSec | FalconMem::ImemNs => {
> + regs::NV_PFALCON_FALCON_IMEMC::default()
> + .set_secure(target_mem == FalconMem::ImemSec)
> + .set_aincw(true)
> + .set_offs(mem_base)
> + .write(bar, &E::ID, port as usize);
> + },
> + FalconMem::Dmem => {
> + // gm200_flcn_pio_dmem_wr_init
Misplaced comment?
> + regs::NV_PFALCON_FALCON_DMEMC::default()
> + .set_aincw(true)
> + .set_offs(mem_base)
> + .write(bar, &E::ID, port as usize);
> + },
> + }
> +
> + while remaining_len > 0 {
> + let xfer_len = core::cmp::min(remaining_len, 256 / 4); //
> pio->max = 256
> +
> + // Perform the PIO write for the next 256 bytes. Each tag
> represents
> + // a 256-byte block in IMEM/DMEM.
> + let mut len = xfer_len;
> +
> + match target_mem {
> + FalconMem::ImemSec | FalconMem::ImemNs => {
> + regs::NV_PFALCON_FALCON_IMEMT::default()
> + .set_tag(tag)
> + .write(bar, &E::ID, port as usize);
> +
> + while len > 0 {
> + regs::NV_PFALCON_FALCON_IMEMD::default()
> + .set_data(img[img_offset])
> + .write(bar, &E::ID, port as usize);
> + img_offset += 1;
> + len -= 1;
> + };
> +
> + tag += 1;
> + },
> + FalconMem::Dmem => {
> + // tag is ignored for DMEM
> + while len > 0 {
> + regs::NV_PFALCON_FALCON_DMEMD::default()
> + .set_data(img[img_offset])
> + .write(bar, &E::ID, port as usize);
> + img_offset += 1;
> + len -= 1;
> + };
> + },
> + }
> +
> + remaining_len -= xfer_len;
> + }
> +
> + Ok(())
> + }
> +
> + /// See nvkm_falcon_pio_wr
> + fn pio_wr<F: FalconFirmware<Target = E>>(
> + &self,
> + bar: &Bar0,
> + fw: &F,
> + target_mem: FalconMem,
> + load_offsets: &FalconLoadTarget,
> + port: u8,
> + tag: u16,
> + ) -> Result {
> + // FIXME: There's probably a better way to create a pointer to
> inside the firmware
> + // Maybe CoherentAllocation needs to implement a method for that.
> + let start = unsafe { fw.start_ptr().add(load_offsets.src_start as
> usize) };
> + self.pio_wr_bytes(bar, start,
> + load_offsets.dst_start as u16,
Lossy conversions require comments. 'as' is a lossy conversion.
thanks,
- Joel
[...]