Turing and GA100 use programmed I/O (PIO) instead of DMA to upload
firmware images into Falcon memory.

A new firmware called the Generic Bootloader (as opposed to the
GSP Bootloader) is used to upload FWSEC.

Signed-off-by: Timur Tabi <[email protected]>
---
 drivers/gpu/nova-core/falcon.rs         | 181 ++++++++++++++++++++++++
 drivers/gpu/nova-core/firmware.rs       |   4 +-
 drivers/gpu/nova-core/firmware/fwsec.rs | 112 ++++++++++++++-
 drivers/gpu/nova-core/gsp/boot.rs       |  10 +-
 4 files changed, 299 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs
index 7af32f65ba5f..f9a4a35b7569 100644
--- a/drivers/gpu/nova-core/falcon.rs
+++ b/drivers/gpu/nova-core/falcon.rs
@@ -20,6 +20,10 @@
 use crate::{
     dma::DmaObject,
     driver::Bar0,
+    firmware::fwsec::{
+        BootloaderDmemDescV2,
+        GenericBootloader, //
+    },
     gpu::Chipset,
     num::{
         FromSafeCast,
@@ -400,6 +404,183 @@ pub(crate) fn reset(&self, bar: &Bar0) -> Result {
         Ok(())
     }
 
+
+    /// See nvkm_falcon_pio_wr - takes a byte array instead of a FalconFirmware
+    fn pio_wr_bytes(
+        &self,
+        bar: &Bar0,
+        source: *const u8,
+        mem_base: u16,
+        length: usize,
+        target_mem: FalconMem,
+        port: u8,
+        tag: u16
+    ) -> Result {
+        // To avoid unnecessary complication in the write loop, make sure the 
buffer
+        // length is aligned.  It always is, which is why an assertion is okay.
+        assert!((length % 4) == 0);
+
+        // From now on, we treat the data as an array of u32
+
+        let length = length / 4;
+        let mut remaining_len: usize = length;
+        let mut img_offset: usize = 0;
+        let mut tag = tag;
+
+        // Get data as a slice of u32s
+        let img = unsafe {
+            core::slice::from_raw_parts(source as *const u32, length)
+        };
+
+        match target_mem {
+            FalconMem::ImemSec | FalconMem::ImemNs => {
+                regs::NV_PFALCON_FALCON_IMEMC::default()
+                    .set_secure(target_mem == FalconMem::ImemSec)
+                    .set_aincw(true)
+                    .set_offs(mem_base)
+                    .write(bar, &E::ID, port as usize);
+            },
+            FalconMem::Dmem => {
+                // gm200_flcn_pio_dmem_wr_init
+                regs::NV_PFALCON_FALCON_DMEMC::default()
+                    .set_aincw(true)
+                    .set_offs(mem_base)
+                    .write(bar, &E::ID, port as usize);
+            },
+        }
+
+        while remaining_len > 0 {
+            let xfer_len = core::cmp::min(remaining_len, 256 / 4); // pio->max 
= 256
+
+            // Perform the PIO write for the next 256 bytes.  Each tag 
represents
+            // a 256-byte block in IMEM/DMEM.
+            let mut len = xfer_len;
+
+            match target_mem {
+                FalconMem::ImemSec | FalconMem::ImemNs => {
+                    regs::NV_PFALCON_FALCON_IMEMT::default()
+                        .set_tag(tag)
+                        .write(bar, &E::ID, port as usize);
+
+                    while len > 0 {
+                        regs::NV_PFALCON_FALCON_IMEMD::default()
+                            .set_data(img[img_offset])
+                            .write(bar, &E::ID, port as usize);
+                        img_offset += 1;
+                        len -= 1;
+                    };
+
+                    tag += 1;
+                },
+                FalconMem::Dmem => {
+                    // tag is ignored for DMEM
+                    while len > 0 {
+                        regs::NV_PFALCON_FALCON_DMEMD::default()
+                            .set_data(img[img_offset])
+                            .write(bar, &E::ID, port as usize);
+                        img_offset += 1;
+                        len -= 1;
+                    };
+                },
+            }
+
+            remaining_len -= xfer_len;
+        }
+
+        Ok(())
+    }
+
+    /// See nvkm_falcon_pio_wr
+    fn pio_wr<F: FalconFirmware<Target = E>>(
+        &self,
+        bar: &Bar0,
+        fw: &F,
+        target_mem: FalconMem,
+        load_offsets: &FalconLoadTarget,
+        port: u8,
+        tag: u16,
+    ) -> Result {
+        // FIXME: There's probably a better way to create a pointer to inside 
the firmware
+        // Maybe CoherentAllocation needs to implement a method for that.
+        let start = unsafe { fw.start_ptr().add(load_offsets.src_start as 
usize) };
+        self.pio_wr_bytes(bar, start,
+            load_offsets.dst_start as u16,
+            load_offsets.len as usize, target_mem, port, tag)
+    }
+
+    /// Perform a PIO copy into `IMEM` and `DMEM` of `fw`, and prepare the 
falcon to run it.
+    pub(crate) fn pio_load<F: FalconFirmware<Target = E>>(
+        &self,
+        bar: &Bar0,
+        fw: &F,
+        gbl: Option<&GenericBootloader>
+    ) -> Result {
+        let imem_sec = fw.imem_sec_load_params();
+        let imem_ns = fw.imem_ns_load_params().unwrap();
+        let dmem = fw.dmem_load_params();
+
+        regs::NV_PFALCON_FBIF_CTL::read(bar, &E::ID)
+            .set_allow_phys_no_ctx(true)
+            .write(bar, &E::ID);
+
+        regs::NV_PFALCON_FALCON_DMACTL::default()
+            .write(bar, &E::ID);
+
+        // If the Generic Bootloader was passed, then use it to boot FRTS
+        if let Some(gbl) =  gbl {
+            let load_params = FalconLoadTarget {
+                src_start: 0,
+                dst_start: 0x10000 - gbl.desc.code_size,
+                len: gbl.desc.code_size,
+            };
+            self.pio_wr_bytes(bar, gbl.ucode.as_ptr(),
+                load_params.dst_start as u16, load_params.len as usize,
+                FalconMem::ImemNs, 0, gbl.desc.start_tag as u16)?;
+
+            // This structure tells the generic bootloader where to find the 
FWSEC
+            // image.
+            let dmem_desc = BootloaderDmemDescV2 {
+                reserved: [0; 4],
+                signature: [0; 4],
+                ctx_dma: 4, // FALCON_DMAIDX_PHYS_SYS_NCOH
+                code_dma_base: fw.dma_handle(),
+                non_sec_code_off: imem_ns.dst_start,
+                non_sec_code_size: imem_ns.len,
+                sec_code_off: imem_sec.dst_start,
+                sec_code_size: imem_sec.len,
+                code_entry_point: 0,
+                data_dma_base: fw.dma_handle() + dmem.src_start as u64,
+                data_size: dmem.len,
+                argc: 0,
+                argv: 0,
+            };
+
+            regs::NV_PFALCON_FBIF_TRANSCFG::update(bar, &E::ID, 4, |v| {
+                v.set_target(FalconFbifTarget::CoherentSysmem)
+                    .set_mem_type(FalconFbifMemType::Physical)
+            });
+
+            self.pio_wr_bytes(bar, &dmem_desc as *const _ as *const u8, 0,
+                core::mem::size_of::<BootloaderDmemDescV2>(),
+                FalconMem::Dmem, 0, 0)?;
+        } else {
+            self.pio_wr(bar, fw, FalconMem::ImemNs, &imem_ns, 0,
+                u16::try_from(imem_ns.dst_start >> 8)?)?;
+            self.pio_wr(bar, fw, FalconMem::ImemSec, &imem_sec, 0,
+                u16::try_from(imem_sec.dst_start >> 8)?)?;
+            self.pio_wr(bar, fw, FalconMem::Dmem, &dmem, 0, 0)?;
+        }
+
+        self.hal.program_brom(self, bar, &fw.brom_params())?;
+
+        // Set `BootVec` to start of non-secure code.
+        regs::NV_PFALCON_FALCON_BOOTVEC::default()
+            .set_value(fw.boot_addr())
+            .write(bar, &E::ID);
+
+        Ok(())
+    }
+
     /// Perform a DMA write according to `load_offsets` from `dma_handle` into 
the falcon's
     /// `target_mem`.
     ///
diff --git a/drivers/gpu/nova-core/firmware.rs 
b/drivers/gpu/nova-core/firmware.rs
index 5ca5bf1fb610..ecab16af0166 100644
--- a/drivers/gpu/nova-core/firmware.rs
+++ b/drivers/gpu/nova-core/firmware.rs
@@ -31,7 +31,7 @@
 pub(crate) const FIRMWARE_VERSION: &str = "570.144";
 
 /// Requests the GPU firmware `name` suitable for `chipset`, with version 
`ver`.
-fn request_firmware(
+pub(crate) fn request_firmware(
     dev: &device::Device,
     chipset: gpu::Chipset,
     name: &str,
@@ -252,7 +252,7 @@ fn no_patch_signature(self) -> FirmwareDmaObject<F, Signed> 
{
 /// Header common to most firmware files.
 #[repr(C)]
 #[derive(Debug, Clone)]
-struct BinHdr {
+pub(crate) struct BinHdr {
     /// Magic number, must be `0x10de`.
     bin_magic: u32,
     /// Version of the header.
diff --git a/drivers/gpu/nova-core/firmware/fwsec.rs 
b/drivers/gpu/nova-core/firmware/fwsec.rs
index 36ff8ed51c23..39fe23dab4bf 100644
--- a/drivers/gpu/nova-core/firmware/fwsec.rs
+++ b/drivers/gpu/nova-core/firmware/fwsec.rs
@@ -40,12 +40,15 @@
         FalconLoadTarget, //
     },
     firmware::{
+        FIRMWARE_VERSION,
+        BinHdr,
         FalconUCodeDesc,
         FirmwareDmaObject,
         FirmwareSignature,
         Signed,
         Unsigned, //
     },
+    gpu::Chipset,
     num::{
         FromSafeCast,
         IntoSafeCast, //
@@ -213,6 +216,44 @@ unsafe fn transmute_mut<T: Sized + FromBytes + AsBytes>(
     T::from_bytes_mut(unsafe { fw.as_slice_mut(offset, size_of::<T>())? 
}).ok_or(EINVAL)
 }
 
+#[repr(C)]
+#[derive(Debug, Clone)]
+pub(crate) struct BootloaderDesc {
+       pub start_tag: u32,
+       pub dmem_load_off: u32,
+       pub code_off: u32,
+       pub code_size: u32,
+       pub data_off: u32,
+       pub data_size: u32,
+}
+// SAFETY: any byte sequence is valid for this struct.
+unsafe impl FromBytes for BootloaderDesc {}
+
+#[repr(C, packed)]
+#[derive(Debug, Clone)]
+pub(crate) struct BootloaderDmemDescV2 {
+       pub reserved: [u32; 4],
+       pub signature: [u32; 4],
+       pub ctx_dma: u32,
+       pub code_dma_base: u64,
+       pub non_sec_code_off: u32,
+       pub non_sec_code_size: u32,
+       pub sec_code_off: u32,
+       pub sec_code_size: u32,
+       pub code_entry_point: u32,
+       pub data_dma_base: u64,
+       pub data_size: u32,
+       pub argc: u32,
+       pub argv: u32,
+}
+// SAFETY: any byte sequence is valid for this struct.
+unsafe impl kernel::transmute::FromBytes for BootloaderDmemDescV2 {}
+
+pub(crate) struct GenericBootloader {
+    pub desc: BootloaderDesc,
+    pub ucode: Vec<u8, kernel::alloc::allocator::Kmalloc>,
+}
+
 /// The FWSEC microcode, extracted from the BIOS and to be run on the GSP 
falcon.
 ///
 /// It is responsible for e.g. carving out the WPR2 region as the first step 
of the GSP bootflow.
@@ -221,6 +262,8 @@ pub(crate) struct FwsecFirmware {
     desc: FalconUCodeDesc,
     /// GPU-accessible DMA object containing the firmware.
     ucode: FirmwareDmaObject<Self, Signed>,
+    /// Generic bootloader
+    gen_bootloader: Option<GenericBootloader>,
 }
 
 impl FalconLoadParams for FwsecFirmware {
@@ -275,7 +318,19 @@ fn brom_params(&self) -> FalconBromParams {
     }
 
     fn boot_addr(&self) -> u32 {
-        0
+        match &self.desc {
+            FalconUCodeDesc::V2(_v2) => {
+                // On V2 platforms, the boot address is extracted from the
+                // generic bootloader, because the gbl is what actually copies
+                // FWSEC into memory, so that is what needs to be booted.
+                if let Some(ref gbl) = self.gen_bootloader {
+                    gbl.desc.start_tag << 8
+                } else {
+                    0
+                }
+            },
+            FalconUCodeDesc::V3(_v3) => 0,
+        }
     }
 }
 
@@ -376,6 +431,7 @@ impl FwsecFirmware {
     /// command.
     pub(crate) fn new(
         dev: &Device<device::Bound>,
+        chipset: Chipset,
         falcon: &Falcon<Gsp>,
         bar: &Bar0,
         bios: &Vbios,
@@ -432,9 +488,49 @@ pub(crate) fn new(
             ucode_dma.no_patch_signature()
         };
 
+        // The Generic Bootloader exists only on Turing and GA100.  To avoid a 
bogus
+        // console error message on other platforms, only try to load it if 
it's
+        // supposed to be there.
+        let gbl_fw = if chipset < Chipset::GA102 {
+            super::request_firmware(dev, chipset, "gen_bootloader", 
FIRMWARE_VERSION)
+        } else {
+            Err(ENOENT)
+        };
+
+        let gbl = match gbl_fw {
+            Ok(fw) => {
+                let hdr = fw.data()
+                    .get(0..size_of::<BinHdr>())
+                    .and_then(BinHdr::from_bytes_copy)
+                    .ok_or(EINVAL)?;
+
+                let desc_offset = hdr.header_offset as usize;
+                let desc = fw.data()
+                    .get(desc_offset..desc_offset + 
size_of::<BootloaderDesc>())
+                    .and_then(BootloaderDesc::from_bytes_copy)
+                    .ok_or(EINVAL)?;
+
+                let ucode_start = hdr.data_offset as usize;
+                let ucode_size = hdr.data_size as usize;
+                let ucode_data = fw.data()
+                    .get(ucode_start..ucode_start + ucode_size)
+                    .ok_or(EINVAL)?;
+
+                let mut ucode = KVec::new();
+                ucode.extend_from_slice(ucode_data, GFP_KERNEL)?;
+
+                Some(GenericBootloader {
+                    desc: desc,
+                    ucode: ucode,
+                })
+            },
+            Err(_) => None,
+        };
+
         Ok(FwsecFirmware {
             desc: desc,
             ucode: ucode_signed,
+            gen_bootloader: gbl,
         })
     }
 
@@ -449,9 +545,17 @@ pub(crate) fn run(
         falcon
             .reset(bar)
             .inspect_err(|e| dev_err!(dev, "Failed to reset GSP falcon: 
{:?}\n", e))?;
-        falcon
-            .dma_load(bar, self)
-            .inspect_err(|e| dev_err!(dev, "Failed to load FWSEC firmware: 
{:?}\n", e))?;
+
+        // If the Generic Bootloader was found, then upload it via PIO , 
otherwise
+        if let Some(ref gbl) = self.gen_bootloader {
+            falcon
+                .pio_load(bar, self, Some(gbl))
+                .inspect_err(|e| dev_err!(dev, "Failed to load FWSEC firmware: 
{:?}\n", e))?;
+        } else {
+            falcon
+                .dma_load(bar, self)
+                .inspect_err(|e| dev_err!(dev, "Failed to load FWSEC firmware: 
{:?}\n", e))?;
+        }
         let (mbox0, _) = falcon
             .boot(bar, Some(0), None)
             .inspect_err(|e| dev_err!(dev, "Failed to boot FWSEC firmware: 
{:?}\n", e))?;
diff --git a/drivers/gpu/nova-core/gsp/boot.rs 
b/drivers/gpu/nova-core/gsp/boot.rs
index eb0ee4f66f0c..ff53d58c1df6 100644
--- a/drivers/gpu/nova-core/gsp/boot.rs
+++ b/drivers/gpu/nova-core/gsp/boot.rs
@@ -44,6 +44,7 @@ impl super::Gsp {
     /// created the WPR2 region.
     fn run_fwsec_frts(
         dev: &device::Device<device::Bound>,
+        chipset: Chipset,
         falcon: &Falcon<Gsp>,
         bar: &Bar0,
         bios: &Vbios,
@@ -61,6 +62,7 @@ fn run_fwsec_frts(
 
         let fwsec_frts = FwsecFirmware::new(
             dev,
+            chipset,
             falcon,
             bar,
             bios,
@@ -143,7 +145,7 @@ pub(crate) fn boot(
         let fb_layout = FbLayout::new(chipset, bar, &gsp_fw)?;
         dev_dbg!(dev, "{:#x?}\n", fb_layout);
 
-        Self::run_fwsec_frts(dev, gsp_falcon, bar, &bios, &fb_layout)?;
+        Self::run_fwsec_frts(dev, chipset, gsp_falcon, bar, &bios, 
&fb_layout)?;
 
         let booter_loader = BooterFirmware::new(
             dev,
@@ -182,7 +184,11 @@ pub(crate) fn boot(
         );
 
         sec2_falcon.reset(bar)?;
-        sec2_falcon.dma_load(bar, &booter_loader)?;
+        if chipset > Chipset::GA100 {
+            sec2_falcon.dma_load(bar, &booter_loader)?;
+        } else {
+            sec2_falcon.pio_load(bar, &booter_loader, None)?;
+        }
         let wpr_handle = wpr_meta.dma_handle();
         let (mbox0, mbox1) = sec2_falcon.boot(
             bar,
-- 
2.51.2

Reply via email to