Turing and GA100 use programmed I/O (PIO) instead of DMA to upload
firmware images into Falcon memory.

Signed-off-by: Timur Tabi <[email protected]>
---
 drivers/gpu/nova-core/falcon.rs         | 135 +++++++++++++++++++++++-
 drivers/gpu/nova-core/falcon/hal.rs     |   2 -
 drivers/gpu/nova-core/firmware/fwsec.rs |   2 +-
 drivers/gpu/nova-core/gsp/boot.rs       |   2 +-
 drivers/gpu/nova-core/regs.rs           |  30 ++++++
 5 files changed, 166 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs
index 344354ed50b8..4f3a3b002725 100644
--- a/drivers/gpu/nova-core/falcon.rs
+++ b/drivers/gpu/nova-core/falcon.rs
@@ -23,6 +23,7 @@
 use crate::{
     dma::DmaObject,
     driver::Bar0,
+    falcon::hal::LoadMethod,
     gpu::Chipset,
     num::{
         FromSafeCast,
@@ -242,7 +243,6 @@ pub(crate) enum FalconMem {
     /// Secure Instruction Memory.
     ImemSecure,
     /// Non-Secure Instruction Memory.
-    #[expect(unused)]
     ImemNonSecure,
     /// Data Memory.
     Dmem,
@@ -410,6 +410,131 @@ pub(crate) fn reset(&self, bar: &Bar0) -> Result {
         Ok(())
     }
 
+    /// Write a slice to Falcon memory using programmed I/O (PIO).
+    ///
+    /// Writes `img` to the specified `target_mem` (IMEM or DMEM) starting at 
`mem_base`.
+    /// For IMEM writes, tags are set for each 256-byte block starting from 
`start_tag`.
+    /// For DMEM, start_tag is ignored.
+    ///
+    /// Returns `EINVAL` if `img.len()` is not a multiple of 4.
+    fn pio_wr_slice(
+        &self,
+        bar: &Bar0,
+        img: &[u8],
+        mem_base: u16,
+        target_mem: FalconMem,
+        start_tag: u16,
+    ) -> Result {
+        // Rejecting misaligned images here allows us to avoid checking
+        // inside the loops.
+        if img.len() % 4 != 0 {
+            return Err(EINVAL);
+        }
+
+        // NV_PFALCON_FALCON_IMEMC supports up to four ports,
+        // but we only ever use one, so just hard-code it.
+        const PORT: usize = 0;
+
+        match target_mem {
+            FalconMem::ImemSecure | FalconMem::ImemNonSecure => {
+                regs::NV_PFALCON_FALCON_IMEMC::default()
+                    .set_secure(target_mem == FalconMem::ImemSecure)
+                    .set_aincw(true)
+                    .set_offs(mem_base)
+                    .write(bar, &E::ID, PORT);
+
+                for (n, block) in img.chunks(256).enumerate() {
+                    let n = u16::try_from(n)?;
+                    let tag: u16 = start_tag.checked_add(n).ok_or(ERANGE)?;
+                    regs::NV_PFALCON_FALCON_IMEMT::default()
+                        .set_tag(tag)
+                        .write(bar, &E::ID, PORT);
+                    for word in block.chunks_exact(4) {
+                        let w = [word[0], word[1], word[2], word[3]];
+                        regs::NV_PFALCON_FALCON_IMEMD::default()
+                            .set_data(u32::from_le_bytes(w))
+                            .write(bar, &E::ID, PORT);
+                    }
+                }
+            }
+            FalconMem::Dmem => {
+                regs::NV_PFALCON_FALCON_DMEMC::default()
+                    .set_aincw(true)
+                    .set_offs(mem_base)
+                    .write(bar, &E::ID, PORT);
+
+                for word in img.chunks_exact(4) {
+                    let w = [word[0], word[1], word[2], word[3]];
+                    regs::NV_PFALCON_FALCON_DMEMD::default()
+                        .set_data(u32::from_le_bytes(w))
+                        .write(bar, &E::ID, PORT);
+                }
+            }
+        }
+
+        Ok(())
+    }
+
+    /// Perform a PIO write of a firmware section to falcon memory.
+    ///
+    /// Extracts the data slice specified by `load_offsets` from `fw` and 
writes it to
+    /// `target_mem` using the given port and tag.
+    fn pio_wr<F: FalconFirmware<Target = E>>(
+        &self,
+        bar: &Bar0,
+        fw: &F,
+        target_mem: FalconMem,
+        load_offsets: &FalconLoadTarget,
+        start_tag: u16,
+    ) -> Result {
+        let start = usize::from_safe_cast(load_offsets.src_start);
+        let len = usize::from_safe_cast(load_offsets.len);
+        let mem_base = u16::try_from(load_offsets.dst_start)?;
+
+        // SAFETY: we are the only user of the firmware image at this stage
+        let data = unsafe { fw.as_slice(start, len).map_err(|_| EINVAL)? };
+
+        self.pio_wr_slice(bar, data, mem_base, target_mem, start_tag)
+    }
+
+    /// Perform a PIO copy into `IMEM` and `DMEM` of `fw`, and prepare the 
falcon to run it.
+    pub(crate) fn pio_load<F: FalconFirmware<Target = E>>(&self, bar: &Bar0, 
fw: &F) -> Result {
+        let imem_sec = fw.imem_sec_load_params();
+        let imem_ns = fw.imem_ns_load_params().ok_or(EINVAL)?;
+        let dmem = fw.dmem_load_params();
+
+        regs::NV_PFALCON_FBIF_CTL::read(bar, &E::ID)
+            .set_allow_phys_no_ctx(true)
+            .write(bar, &E::ID);
+
+        regs::NV_PFALCON_FALCON_DMACTL::default().write(bar, &E::ID);
+
+        self.pio_wr(
+            bar,
+            fw,
+            FalconMem::ImemNonSecure,
+            &imem_ns,
+            u16::try_from(imem_ns.dst_start >> 8)?,
+        )?;
+        self.pio_wr(
+            bar,
+            fw,
+            FalconMem::ImemSecure,
+            &imem_sec,
+            u16::try_from(imem_sec.dst_start >> 8)?,
+        )?;
+        self.pio_wr(bar, fw, FalconMem::Dmem, &dmem, 0)?;
+
+        self.hal.program_brom(self, bar, &fw.brom_params())?;
+
+        // Set `BootVec` to start of non-secure code.
+        regs::NV_PFALCON_FALCON_BOOTVEC::default()
+            .set_value(fw.boot_addr())
+            .write(bar, &E::ID);
+
+        Ok(())
+    }
+
     /// Perform a DMA write according to `load_offsets` from `dma_handle` into 
the falcon's
     /// `target_mem`.
     ///
@@ -638,6 +763,14 @@ pub(crate) fn is_riscv_active(&self, bar: &Bar0) -> bool {
         self.hal.is_riscv_active(bar)
     }
 
+    // Load a firmware image into Falcon memory
+    pub(crate) fn load<F: FalconFirmware<Target = E>>(&self, bar: &Bar0, fw: 
&F) -> Result {
+        match self.hal.load_method() {
+            LoadMethod::Pio => self.pio_load(bar, fw),
+            LoadMethod::Dma => self.dma_load(bar, fw),
+        }
+    }
+
     /// Write the application version to the OS register.
     pub(crate) fn write_os_version(&self, bar: &Bar0, app_version: u32) {
         regs::NV_PFALCON_FALCON_OS::default()
diff --git a/drivers/gpu/nova-core/falcon/hal.rs 
b/drivers/gpu/nova-core/falcon/hal.rs
index fe6de900e8b0..89babd5f9325 100644
--- a/drivers/gpu/nova-core/falcon/hal.rs
+++ b/drivers/gpu/nova-core/falcon/hal.rs
@@ -17,7 +17,6 @@
 
 /// Method used to load data into falcon memory. Some GPU architectures need
 /// PIO and others can use DMA.
-#[expect(unused)]
 pub(crate) enum LoadMethod {
     /// Programmed I/O
     Pio,
@@ -60,7 +59,6 @@ fn signature_reg_fuse_version(
     fn reset_eng(&self, bar: &Bar0) -> Result;
 
     /// returns the method needed to load data into Falcon memory
-    #[expect(unused)]
     fn load_method(&self) -> LoadMethod;
 }
 
diff --git a/drivers/gpu/nova-core/firmware/fwsec.rs 
b/drivers/gpu/nova-core/firmware/fwsec.rs
index 89dc4526041b..a8ec08a500ac 100644
--- a/drivers/gpu/nova-core/firmware/fwsec.rs
+++ b/drivers/gpu/nova-core/firmware/fwsec.rs
@@ -420,7 +420,7 @@ pub(crate) fn run(
             .reset(bar)
             .inspect_err(|e| dev_err!(dev, "Failed to reset GSP falcon: 
{:?}\n", e))?;
         falcon
-            .dma_load(bar, self)
+            .load(bar, self)
             .inspect_err(|e| dev_err!(dev, "Failed to load FWSEC firmware: 
{:?}\n", e))?;
         let (mbox0, _) = falcon
             .boot(bar, Some(0), None)
diff --git a/drivers/gpu/nova-core/gsp/boot.rs 
b/drivers/gpu/nova-core/gsp/boot.rs
index 581b412554dc..be427fe26a58 100644
--- a/drivers/gpu/nova-core/gsp/boot.rs
+++ b/drivers/gpu/nova-core/gsp/boot.rs
@@ -183,7 +183,7 @@ pub(crate) fn boot(
         );
 
         sec2_falcon.reset(bar)?;
-        sec2_falcon.dma_load(bar, &booter_loader)?;
+        sec2_falcon.load(bar, &booter_loader)?;
         let wpr_handle = wpr_meta.dma_handle();
         let (mbox0, mbox1) = sec2_falcon.boot(
             bar,
diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs
index ea0d32f5396c..53f412f0ca32 100644
--- a/drivers/gpu/nova-core/regs.rs
+++ b/drivers/gpu/nova-core/regs.rs
@@ -364,6 +364,36 @@ pub(crate) fn with_falcon_mem(self, mem: FalconMem) -> 
Self {
     1:1     startcpu as bool;
 });
 
+// IMEM access control register. Up to 4 ports are available for IMEM access.
+register!(NV_PFALCON_FALCON_IMEMC @ PFalconBase[0x00000180[4; 16]] {
+    15:0      offs as u16, "IMEM block and word offset";
+    24:24     aincw as bool, "Auto-increment on write";
+    28:28     secure as bool, "Access secure IMEM";
+});
+
+// IMEM data register. Reading/writing this register accesses IMEM at the 
address
+// specified by the corresponding IMEMC register.
+register!(NV_PFALCON_FALCON_IMEMD @ PFalconBase[0x00000184[4; 16]] {
+    31:0      data as u32;
+});
+
+// IMEM tag register. Used to set the tag for the current IMEM block.
+register!(NV_PFALCON_FALCON_IMEMT @ PFalconBase[0x00000188[4; 16]] {
+    15:0      tag as u16;
+});
+
+// DMEM access control register. Up to 8 ports are available for DMEM access.
+register!(NV_PFALCON_FALCON_DMEMC @ PFalconBase[0x000001c0[8; 8]] {
+    15:0      offs as u16, "DMEM block and word offset";
+    24:24     aincw as bool, "Auto-increment on write";
+});
+
+// DMEM data register. Reading/writing this register accesses DMEM at the 
address
+// specified by the corresponding DMEMC register.
+register!(NV_PFALCON_FALCON_DMEMD @ PFalconBase[0x000001c4[8; 8]] {
+    31:0      data as u32;
+});
+
 // Actually known as `NV_PSEC_FALCON_ENGINE` and `NV_PGSP_FALCON_ENGINE` 
depending on the falcon
 // instance.
 register!(NV_PFALCON_FALCON_ENGINE @ PFalconBase[0x000003c0] {
-- 
2.52.0

Reply via email to