Turing and GA100 use programmed I/O (PIO) instead of DMA to upload firmware images into Falcon memory.
A new firmware called the Generic Bootloader (as opposed to the GSP Bootloader) is used to upload FWSEC. Signed-off-by: Timur Tabi <[email protected]> --- drivers/gpu/nova-core/falcon.rs | 181 ++++++++++++++++++++++++ drivers/gpu/nova-core/firmware.rs | 4 +- drivers/gpu/nova-core/firmware/fwsec.rs | 112 ++++++++++++++- drivers/gpu/nova-core/gsp/boot.rs | 10 +- 4 files changed, 299 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs index 7af32f65ba5f..f9a4a35b7569 100644 --- a/drivers/gpu/nova-core/falcon.rs +++ b/drivers/gpu/nova-core/falcon.rs @@ -20,6 +20,10 @@ use crate::{ dma::DmaObject, driver::Bar0, + firmware::fwsec::{ + BootloaderDmemDescV2, + GenericBootloader, // + }, gpu::Chipset, num::{ FromSafeCast, @@ -400,6 +404,183 @@ pub(crate) fn reset(&self, bar: &Bar0) -> Result { Ok(()) } + + /// See nvkm_falcon_pio_wr - takes a byte array instead of a FalconFirmware + fn pio_wr_bytes( + &self, + bar: &Bar0, + source: *const u8, + mem_base: u16, + length: usize, + target_mem: FalconMem, + port: u8, + tag: u16 + ) -> Result { + // To avoid unnecessary complication in the write loop, make sure the buffer + // length is aligned. It always is, which is why an assertion is okay. + assert!((length % 4) == 0); + + // From now on, we treat the data as an array of u32 + + let length = length / 4; + let mut remaining_len: usize = length; + let mut img_offset: usize = 0; + let mut tag = tag; + + // Get data as a slice of u32s + let img = unsafe { + core::slice::from_raw_parts(source as *const u32, length) + }; + + match target_mem { + FalconMem::ImemSec | FalconMem::ImemNs => { + regs::NV_PFALCON_FALCON_IMEMC::default() + .set_secure(target_mem == FalconMem::ImemSec) + .set_aincw(true) + .set_offs(mem_base) + .write(bar, &E::ID, port as usize); + }, + FalconMem::Dmem => { + // gm200_flcn_pio_dmem_wr_init + regs::NV_PFALCON_FALCON_DMEMC::default() + .set_aincw(true) + .set_offs(mem_base) + .write(bar, &E::ID, port as usize); + }, + } + + while remaining_len > 0 { + let xfer_len = core::cmp::min(remaining_len, 256 / 4); // pio->max = 256 + + // Perform the PIO write for the next 256 bytes. Each tag represents + // a 256-byte block in IMEM/DMEM. + let mut len = xfer_len; + + match target_mem { + FalconMem::ImemSec | FalconMem::ImemNs => { + regs::NV_PFALCON_FALCON_IMEMT::default() + .set_tag(tag) + .write(bar, &E::ID, port as usize); + + while len > 0 { + regs::NV_PFALCON_FALCON_IMEMD::default() + .set_data(img[img_offset]) + .write(bar, &E::ID, port as usize); + img_offset += 1; + len -= 1; + }; + + tag += 1; + }, + FalconMem::Dmem => { + // tag is ignored for DMEM + while len > 0 { + regs::NV_PFALCON_FALCON_DMEMD::default() + .set_data(img[img_offset]) + .write(bar, &E::ID, port as usize); + img_offset += 1; + len -= 1; + }; + }, + } + + remaining_len -= xfer_len; + } + + Ok(()) + } + + /// See nvkm_falcon_pio_wr + fn pio_wr<F: FalconFirmware<Target = E>>( + &self, + bar: &Bar0, + fw: &F, + target_mem: FalconMem, + load_offsets: &FalconLoadTarget, + port: u8, + tag: u16, + ) -> Result { + // FIXME: There's probably a better way to create a pointer to inside the firmware + // Maybe CoherentAllocation needs to implement a method for that. + let start = unsafe { fw.start_ptr().add(load_offsets.src_start as usize) }; + self.pio_wr_bytes(bar, start, + load_offsets.dst_start as u16, + load_offsets.len as usize, target_mem, port, tag) + } + + /// Perform a PIO copy into `IMEM` and `DMEM` of `fw`, and prepare the falcon to run it. + pub(crate) fn pio_load<F: FalconFirmware<Target = E>>( + &self, + bar: &Bar0, + fw: &F, + gbl: Option<&GenericBootloader> + ) -> Result { + let imem_sec = fw.imem_sec_load_params(); + let imem_ns = fw.imem_ns_load_params().unwrap(); + let dmem = fw.dmem_load_params(); + + regs::NV_PFALCON_FBIF_CTL::read(bar, &E::ID) + .set_allow_phys_no_ctx(true) + .write(bar, &E::ID); + + regs::NV_PFALCON_FALCON_DMACTL::default() + .write(bar, &E::ID); + + // If the Generic Bootloader was passed, then use it to boot FRTS + if let Some(gbl) = gbl { + let load_params = FalconLoadTarget { + src_start: 0, + dst_start: 0x10000 - gbl.desc.code_size, + len: gbl.desc.code_size, + }; + self.pio_wr_bytes(bar, gbl.ucode.as_ptr(), + load_params.dst_start as u16, load_params.len as usize, + FalconMem::ImemNs, 0, gbl.desc.start_tag as u16)?; + + // This structure tells the generic bootloader where to find the FWSEC + // image. + let dmem_desc = BootloaderDmemDescV2 { + reserved: [0; 4], + signature: [0; 4], + ctx_dma: 4, // FALCON_DMAIDX_PHYS_SYS_NCOH + code_dma_base: fw.dma_handle(), + non_sec_code_off: imem_ns.dst_start, + non_sec_code_size: imem_ns.len, + sec_code_off: imem_sec.dst_start, + sec_code_size: imem_sec.len, + code_entry_point: 0, + data_dma_base: fw.dma_handle() + dmem.src_start as u64, + data_size: dmem.len, + argc: 0, + argv: 0, + }; + + regs::NV_PFALCON_FBIF_TRANSCFG::update(bar, &E::ID, 4, |v| { + v.set_target(FalconFbifTarget::CoherentSysmem) + .set_mem_type(FalconFbifMemType::Physical) + }); + + self.pio_wr_bytes(bar, &dmem_desc as *const _ as *const u8, 0, + core::mem::size_of::<BootloaderDmemDescV2>(), + FalconMem::Dmem, 0, 0)?; + } else { + self.pio_wr(bar, fw, FalconMem::ImemNs, &imem_ns, 0, + u16::try_from(imem_ns.dst_start >> 8)?)?; + self.pio_wr(bar, fw, FalconMem::ImemSec, &imem_sec, 0, + u16::try_from(imem_sec.dst_start >> 8)?)?; + self.pio_wr(bar, fw, FalconMem::Dmem, &dmem, 0, 0)?; + } + + self.hal.program_brom(self, bar, &fw.brom_params())?; + + // Set `BootVec` to start of non-secure code. + regs::NV_PFALCON_FALCON_BOOTVEC::default() + .set_value(fw.boot_addr()) + .write(bar, &E::ID); + + Ok(()) + } + /// Perform a DMA write according to `load_offsets` from `dma_handle` into the falcon's /// `target_mem`. /// diff --git a/drivers/gpu/nova-core/firmware.rs b/drivers/gpu/nova-core/firmware.rs index 5ca5bf1fb610..ecab16af0166 100644 --- a/drivers/gpu/nova-core/firmware.rs +++ b/drivers/gpu/nova-core/firmware.rs @@ -31,7 +31,7 @@ pub(crate) const FIRMWARE_VERSION: &str = "570.144"; /// Requests the GPU firmware `name` suitable for `chipset`, with version `ver`. -fn request_firmware( +pub(crate) fn request_firmware( dev: &device::Device, chipset: gpu::Chipset, name: &str, @@ -252,7 +252,7 @@ fn no_patch_signature(self) -> FirmwareDmaObject<F, Signed> { /// Header common to most firmware files. #[repr(C)] #[derive(Debug, Clone)] -struct BinHdr { +pub(crate) struct BinHdr { /// Magic number, must be `0x10de`. bin_magic: u32, /// Version of the header. diff --git a/drivers/gpu/nova-core/firmware/fwsec.rs b/drivers/gpu/nova-core/firmware/fwsec.rs index 36ff8ed51c23..39fe23dab4bf 100644 --- a/drivers/gpu/nova-core/firmware/fwsec.rs +++ b/drivers/gpu/nova-core/firmware/fwsec.rs @@ -40,12 +40,15 @@ FalconLoadTarget, // }, firmware::{ + FIRMWARE_VERSION, + BinHdr, FalconUCodeDesc, FirmwareDmaObject, FirmwareSignature, Signed, Unsigned, // }, + gpu::Chipset, num::{ FromSafeCast, IntoSafeCast, // @@ -213,6 +216,44 @@ unsafe fn transmute_mut<T: Sized + FromBytes + AsBytes>( T::from_bytes_mut(unsafe { fw.as_slice_mut(offset, size_of::<T>())? }).ok_or(EINVAL) } +#[repr(C)] +#[derive(Debug, Clone)] +pub(crate) struct BootloaderDesc { + pub start_tag: u32, + pub dmem_load_off: u32, + pub code_off: u32, + pub code_size: u32, + pub data_off: u32, + pub data_size: u32, +} +// SAFETY: any byte sequence is valid for this struct. +unsafe impl FromBytes for BootloaderDesc {} + +#[repr(C, packed)] +#[derive(Debug, Clone)] +pub(crate) struct BootloaderDmemDescV2 { + pub reserved: [u32; 4], + pub signature: [u32; 4], + pub ctx_dma: u32, + pub code_dma_base: u64, + pub non_sec_code_off: u32, + pub non_sec_code_size: u32, + pub sec_code_off: u32, + pub sec_code_size: u32, + pub code_entry_point: u32, + pub data_dma_base: u64, + pub data_size: u32, + pub argc: u32, + pub argv: u32, +} +// SAFETY: any byte sequence is valid for this struct. +unsafe impl kernel::transmute::FromBytes for BootloaderDmemDescV2 {} + +pub(crate) struct GenericBootloader { + pub desc: BootloaderDesc, + pub ucode: Vec<u8, kernel::alloc::allocator::Kmalloc>, +} + /// The FWSEC microcode, extracted from the BIOS and to be run on the GSP falcon. /// /// It is responsible for e.g. carving out the WPR2 region as the first step of the GSP bootflow. @@ -221,6 +262,8 @@ pub(crate) struct FwsecFirmware { desc: FalconUCodeDesc, /// GPU-accessible DMA object containing the firmware. ucode: FirmwareDmaObject<Self, Signed>, + /// Generic bootloader + gen_bootloader: Option<GenericBootloader>, } impl FalconLoadParams for FwsecFirmware { @@ -275,7 +318,19 @@ fn brom_params(&self) -> FalconBromParams { } fn boot_addr(&self) -> u32 { - 0 + match &self.desc { + FalconUCodeDesc::V2(_v2) => { + // On V2 platforms, the boot address is extracted from the + // generic bootloader, because the gbl is what actually copies + // FWSEC into memory, so that is what needs to be booted. + if let Some(ref gbl) = self.gen_bootloader { + gbl.desc.start_tag << 8 + } else { + 0 + } + }, + FalconUCodeDesc::V3(_v3) => 0, + } } } @@ -376,6 +431,7 @@ impl FwsecFirmware { /// command. pub(crate) fn new( dev: &Device<device::Bound>, + chipset: Chipset, falcon: &Falcon<Gsp>, bar: &Bar0, bios: &Vbios, @@ -432,9 +488,49 @@ pub(crate) fn new( ucode_dma.no_patch_signature() }; + // The Generic Bootloader exists only on Turing and GA100. To avoid a bogus + // console error message on other platforms, only try to load it if it's + // supposed to be there. + let gbl_fw = if chipset < Chipset::GA102 { + super::request_firmware(dev, chipset, "gen_bootloader", FIRMWARE_VERSION) + } else { + Err(ENOENT) + }; + + let gbl = match gbl_fw { + Ok(fw) => { + let hdr = fw.data() + .get(0..size_of::<BinHdr>()) + .and_then(BinHdr::from_bytes_copy) + .ok_or(EINVAL)?; + + let desc_offset = hdr.header_offset as usize; + let desc = fw.data() + .get(desc_offset..desc_offset + size_of::<BootloaderDesc>()) + .and_then(BootloaderDesc::from_bytes_copy) + .ok_or(EINVAL)?; + + let ucode_start = hdr.data_offset as usize; + let ucode_size = hdr.data_size as usize; + let ucode_data = fw.data() + .get(ucode_start..ucode_start + ucode_size) + .ok_or(EINVAL)?; + + let mut ucode = KVec::new(); + ucode.extend_from_slice(ucode_data, GFP_KERNEL)?; + + Some(GenericBootloader { + desc: desc, + ucode: ucode, + }) + }, + Err(_) => None, + }; + Ok(FwsecFirmware { desc: desc, ucode: ucode_signed, + gen_bootloader: gbl, }) } @@ -449,9 +545,17 @@ pub(crate) fn run( falcon .reset(bar) .inspect_err(|e| dev_err!(dev, "Failed to reset GSP falcon: {:?}\n", e))?; - falcon - .dma_load(bar, self) - .inspect_err(|e| dev_err!(dev, "Failed to load FWSEC firmware: {:?}\n", e))?; + + // If the Generic Bootloader was found, then upload it via PIO , otherwise + if let Some(ref gbl) = self.gen_bootloader { + falcon + .pio_load(bar, self, Some(gbl)) + .inspect_err(|e| dev_err!(dev, "Failed to load FWSEC firmware: {:?}\n", e))?; + } else { + falcon + .dma_load(bar, self) + .inspect_err(|e| dev_err!(dev, "Failed to load FWSEC firmware: {:?}\n", e))?; + } let (mbox0, _) = falcon .boot(bar, Some(0), None) .inspect_err(|e| dev_err!(dev, "Failed to boot FWSEC firmware: {:?}\n", e))?; diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs index eb0ee4f66f0c..ff53d58c1df6 100644 --- a/drivers/gpu/nova-core/gsp/boot.rs +++ b/drivers/gpu/nova-core/gsp/boot.rs @@ -44,6 +44,7 @@ impl super::Gsp { /// created the WPR2 region. fn run_fwsec_frts( dev: &device::Device<device::Bound>, + chipset: Chipset, falcon: &Falcon<Gsp>, bar: &Bar0, bios: &Vbios, @@ -61,6 +62,7 @@ fn run_fwsec_frts( let fwsec_frts = FwsecFirmware::new( dev, + chipset, falcon, bar, bios, @@ -143,7 +145,7 @@ pub(crate) fn boot( let fb_layout = FbLayout::new(chipset, bar, &gsp_fw)?; dev_dbg!(dev, "{:#x?}\n", fb_layout); - Self::run_fwsec_frts(dev, gsp_falcon, bar, &bios, &fb_layout)?; + Self::run_fwsec_frts(dev, chipset, gsp_falcon, bar, &bios, &fb_layout)?; let booter_loader = BooterFirmware::new( dev, @@ -182,7 +184,11 @@ pub(crate) fn boot( ); sec2_falcon.reset(bar)?; - sec2_falcon.dma_load(bar, &booter_loader)?; + if chipset > Chipset::GA100 { + sec2_falcon.dma_load(bar, &booter_loader)?; + } else { + sec2_falcon.pio_load(bar, &booter_loader, None)?; + } let wpr_handle = wpr_meta.dma_handle(); let (mbox0, mbox1) = sec2_falcon.boot( bar, -- 2.51.2
