On Wed, 11 Feb 2026 17:37:10 -0800
Deborah Brouwer <[email protected]> wrote:

> From: Boris Brezillon <[email protected]>
> 
> Add GPU virtual address space management using the DRM GPUVM framework.
> Each virtual memory (VM) space is backed by ARM64 LPAE Stage 1 page tables
> and can be mapped into hardware address space (AS) slots for GPU execution.
> 
> The implementation provides memory isolation and virtual address
> allocation. VMs support mapping GEM buffer objects with configurable
> protection flags (readonly, noexec, uncached) and handle both 4KB and 2MB
> page sizes.
> 
> The vm module integrates with the MMU for address space activation and
> provides map/unmap/remap operations with page table synchronization.
> 
> Signed-off-by: Boris Brezillon <[email protected]>
> Co-developed-by: Daniel Almeida <[email protected]>
> Signed-off-by: Daniel Almeida <[email protected]>
> Co-developed-by: Deborah Brouwer <[email protected]>
> Signed-off-by: Deborah Brouwer <[email protected]>
> ---
>  drivers/gpu/drm/tyr/gem.rs |   1 -
>  drivers/gpu/drm/tyr/gpu.rs |   1 -
>  drivers/gpu/drm/tyr/tyr.rs |   1 +
>  drivers/gpu/drm/tyr/vm.rs  | 783 +++++++++++++++++++++++++++++++++++++
>  4 files changed, 784 insertions(+), 2 deletions(-)
>  create mode 100644 drivers/gpu/drm/tyr/vm.rs
> 
> diff --git a/drivers/gpu/drm/tyr/gem.rs b/drivers/gpu/drm/tyr/gem.rs
> index 6a58f2da88d3..111acf33993f 100644
> --- a/drivers/gpu/drm/tyr/gem.rs
> +++ b/drivers/gpu/drm/tyr/gem.rs
> @@ -48,7 +48,6 @@ fn new<Ctx: DeviceContext>(
>  pub(crate) type Bo = gem::shmem::Object<BoData>;
>  
>  /// Creates a dummy GEM object to serve as the root of a GPUVM.
> -#[expect(dead_code)]
>  pub(crate) fn new_dummy_object<Ctx: DeviceContext>(ddev: &TyrDrmDevice<Ctx>) 
> -> Result<ARef<Bo>> {
>      let bo = gem::shmem::Object::<BoData>::new(
>          ddev,
> diff --git a/drivers/gpu/drm/tyr/gpu.rs b/drivers/gpu/drm/tyr/gpu.rs
> index b5f11bc96fa0..f5e7086ff73c 100644
> --- a/drivers/gpu/drm/tyr/gpu.rs
> +++ b/drivers/gpu/drm/tyr/gpu.rs
> @@ -135,7 +135,6 @@ pub(crate) fn log(&self, pdev: &platform::Device) {
>      }
>  
>      /// Returns the number of virtual address bits supported by the GPU.
> -    #[expect(dead_code)]
>      pub(crate) fn va_bits(&self) -> u32 {
>          self.mmu_features & genmask_u32(0..=7)
>      }
> diff --git a/drivers/gpu/drm/tyr/tyr.rs b/drivers/gpu/drm/tyr/tyr.rs
> index ae435c7e80b1..8e73db3a080a 100644
> --- a/drivers/gpu/drm/tyr/tyr.rs
> +++ b/drivers/gpu/drm/tyr/tyr.rs
> @@ -14,6 +14,7 @@
>  mod mmu;
>  mod regs;
>  mod slot;
> +mod vm;
>  
>  kernel::module_platform_driver! {
>      type: TyrPlatformDeviceData,
> diff --git a/drivers/gpu/drm/tyr/vm.rs b/drivers/gpu/drm/tyr/vm.rs
> new file mode 100644
> index 000000000000..806bc4e587d6
> --- /dev/null
> +++ b/drivers/gpu/drm/tyr/vm.rs
> @@ -0,0 +1,783 @@
> +// SPDX-License-Identifier: GPL-2.0 or MIT
> +
> +//! GPU virtual memory management using the DRM GPUVM framework.
> +//!
> +//! This module manages GPU virtual address spaces, providing memory 
> isolation and
> +//! the illusion of owning the entire VA range, similar to CPU virtual 
> memory. Each
> +//! VM is backed by ARM64 LPAE Stage 1 page tables and can be mapped into 
> hardware
> +//! address space (AS) slots for GPU execution.
> +#![allow(dead_code)]
> +
> +use core::ops::Range;
> +
> +use kernel::{
> +    alloc::KBox,
> +    c_str,
> +    drm::{
> +        gpuvm::{
> +            DriverGpuVm,
> +            GpuVaAlloc,
> +            GpuVm,
> +            GpuVmBoRegistered,
> +            GpuVmCore,
> +            OpMap,
> +            OpMapRequest,
> +            OpMapped,
> +            OpRemap,
> +            OpRemapped,
> +            OpUnmap,
> +            OpUnmapped, //
> +        },
> +        DeviceContext, //
> +    },
> +    impl_flags,
> +    iommu::pgtable::{
> +        prot,
> +        Config,
> +        IoPageTable,
> +        ARM64LPAES1, //
> +    },
> +    new_mutex,
> +    platform,
> +    prelude::*,
> +    sizes::{
> +        SZ_1G,
> +        SZ_2M,
> +        SZ_4K, //
> +    },
> +    sync::{
> +        aref::ARef,
> +        Arc,
> +        ArcBorrow,
> +        Mutex, //
> +    },
> +    uapi, //
> +};
> +
> +use crate::{
> +    driver::{
> +        TyrDrmDevice,
> +        TyrDrmDriver, //
> +    },
> +    gem,
> +    gem::Bo,
> +    gpu::GpuInfo,
> +    mmu::{
> +        address_space::*,
> +        Mmu, //
> +    },
> +    regs::*, //
> +};
> +
> +impl_flags!(
> +    #[derive(Debug, Clone, Default, Copy, PartialEq, Eq)]
> +    pub(crate) struct VmMapFlags(u32);

Missing docs. I'll stop pointing those out, and let you check any
undocumented pub struct/enum/field/function in v2. I'm here to help if
you're struggling to find a good description.

> +
> +    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
> +    pub(crate) enum VmFlag {
> +        Readonly = 
> uapi::drm_panthor_vm_bind_op_flags_DRM_PANTHOR_VM_BIND_OP_MAP_READONLY as u32,
> +        Noexec = 
> uapi::drm_panthor_vm_bind_op_flags_DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC as u32,
> +        Uncached = 
> uapi::drm_panthor_vm_bind_op_flags_DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED as u32,
> +    }
> +);
> +
> +impl VmMapFlags {
> +    /// Convert the flags to `pgtable::prot`.
> +    fn to_prot(self) -> u32 {
> +        let mut prot = 0;
> +
> +        if self.contains(VmFlag::Readonly) {
> +            prot |= prot::READ;
> +        } else {
> +            prot |= prot::READ | prot::WRITE;
> +        }
> +
> +        if self.contains(VmFlag::Noexec) {
> +            prot |= prot::NOEXEC;
> +        }
> +
> +        if !self.contains(VmFlag::Uncached) {
> +            prot |= prot::CACHE;
> +        }
> +
> +        prot
> +    }
> +}
> +
> +impl core::fmt::Display for VmMapFlags {
> +    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
> +        let mut first = true;
> +
> +        if self.contains(VmFlag::Readonly) {
> +            write!(f, "READONLY")?;
> +            first = false;
> +        }
> +        if self.contains(VmFlag::Noexec) {
> +            if !first {
> +                write!(f, " | ")?;
> +            }
> +            write!(f, "NOEXEC")?;
> +            first = false;
> +        }
> +
> +        if self.contains(VmFlag::Uncached) {
> +            if !first {
> +                write!(f, " | ")?;
> +            }
> +            write!(f, "UNCACHED")?;
> +        }
> +
> +        Ok(())
> +    }
> +}
> +
> +impl TryFrom<u32> for VmMapFlags {
> +    type Error = Error;
> +
> +    fn try_from(value: u32) -> core::result::Result<Self, Self::Error> {
> +        let valid = 
> (kernel::uapi::drm_panthor_vm_bind_op_flags_DRM_PANTHOR_VM_BIND_OP_MAP_READONLY
> +            | 
> kernel::uapi::drm_panthor_vm_bind_op_flags_DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC
> +            | 
> kernel::uapi::drm_panthor_vm_bind_op_flags_DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED)
> +            as u32;
> +
> +        if value & !valid != 0 {
> +            pr_err!("Invalid VM map flags: {:#x}\n", value);
> +            return Err(EINVAL);
> +        }
> +        Ok(Self(value))
> +    }
> +}
> +
> +struct VmMapArgs {
> +    flags: VmMapFlags,
> +    vm_bo: GpuVmBoRegistered<GpuVmData>,
> +    bo_offset: u64,
> +}
> +
> +enum VmOpType {
> +    Map(VmMapArgs),
> +    Unmap,
> +}
> +
> +struct VmOpResources {
> +    /// This handles the remap case.
> +    ///
> +    /// Partial unmap requests or map requests overlapping existing mappings
> +    /// will trigger a remap call, which needs to register up to three VA
> +    /// objects (one for the new mapping, and two for the previous and next
> +    /// mappings).
> +    preallocated_gpuvas: [Option<GpuVaAlloc<GpuVmData>>; 3],
> +}
> +
> +struct VmOpRequest {
> +    /// Request type.
> +    op_type: VmOpType,
> +
> +    /// Region of the virtual address space covered by this request.
> +    region: Range<u64>,
> +}
> +
> +struct PtMapArgs {
> +    /// Flags describing authorized accesses for this mapping.
> +    ///
> +    /// This is directly derived from the VmMapFlags.
> +    prot: u32,
> +}
> +
> +enum PtOpType {
> +    Map(PtMapArgs),
> +    Unmap,
> +}
> +
> +pub(crate) struct PtUpdateContext<'ctx> {
> +    /// Page table.
> +    pt: &'ctx IoPageTable<ARM64LPAES1>,
> +
> +    /// MMU
> +    mmu: &'ctx Mmu,
> +
> +    /// Reference to the AS data to pass to the MMU functions
> +    as_data: &'ctx VmAsData,
> +
> +    /// Region of the virtual address space covered by this request.
> +    region: Range<u64>,
> +
> +    /// Operation type.
> +    op_type: PtOpType,
> +
> +    /// Pre-allocated resources that can be used when executing the request.
> +    resources: &'ctx mut VmOpResources,
> +}
> +
> +impl<'ctx> PtUpdateContext<'ctx> {
> +    fn new(
> +        pt: &'ctx IoPageTable<ARM64LPAES1>,
> +        mmu: &'ctx Mmu,
> +        as_data: &'ctx VmAsData,
> +        region: Range<u64>,
> +        op_type: PtOpType,
> +        resources: &'ctx mut VmOpResources,
> +    ) -> Result<PtUpdateContext<'ctx>> {
> +        mmu.start_vm_update(as_data, &region)?;
> +
> +        Ok(Self {
> +            pt,
> +            mmu,
> +            as_data,
> +            region,
> +            op_type,
> +            resources,
> +        })
> +    }
> +
> +    /// Finds one of our pre-allocated VAs.
> +    ///
> +    /// It is a logic error to call this more than three times for a given
> +    /// PtUpdateContext.
> +    fn preallocated_gpuva(&mut self) -> Result<GpuVaAlloc<GpuVmData>> {
> +        self.resources
> +            .preallocated_gpuvas
> +            .iter_mut()
> +            .find_map(|f| f.take())
> +            .ok_or(EINVAL)
> +    }
> +}
> +
> +impl Drop for PtUpdateContext<'_> {
> +    fn drop(&mut self) {
> +        if let Err(e) = self.mmu.end_vm_update(self.as_data) {
> +            pr_err!("Failed to end VM update {:?}\n", e);
> +        }
> +
> +        if let Err(e) = self.mmu.flush_vm(self.as_data) {
> +            pr_err!("Failed to flush VM {:?}\n", e);
> +        }
> +    }
> +}
> +
> +pub(crate) struct GpuVmData {}
> +
> +/// GPU virtual address space.
> +///
> +/// Each VM can be mapped into a hardware address space slot.
> +#[pin_data]
> +pub(crate) struct Vm {
> +    /// Data referenced by an AS when the VM is active
> +    as_data: Arc<VmAsData>,
> +    /// MMU manager.
> +    mmu: Arc<Mmu>,
> +    /// Platform device reference (needed to access the page table through 
> devres).
> +    pdev: ARef<platform::Device>,
> +    /// DRM GPUVM core for managing virtual address space.
> +    #[pin]
> +    gpuvm_core: Mutex<GpuVmCore<GpuVmData>>,
> +    /// Non-core part of the GPUVM. Can be used for stuff that doesn't 
> modify the
> +    /// internal mapping tree, like GpuVm::obtain()
> +    gpuvm: ARef<GpuVm<GpuVmData>>,
> +    /// VA range for this VM.
> +    va_range: Range<u64>,
> +}
> +
> +impl Vm {
> +    pub(crate) fn new<Ctx: DeviceContext>(
> +        pdev: &platform::Device,
> +        ddev: &TyrDrmDevice<Ctx>,
> +        mmu: ArcBorrow<'_, Mmu>,
> +        gpu_info: &GpuInfo,
> +    ) -> Result<Arc<Vm>> {
> +        let va_bits = gpu_info.va_bits();
> +        let pa_bits = gpu_info.pa_bits();
> +
> +        let pt_config = Config {
> +            quirks: 0,
> +            pgsize_bitmap: SZ_4K | SZ_2M,
> +            ias: va_bits,
> +            oas: pa_bits,
> +            coherent_walk: false,
> +        };
> +
> +        // SAFETY: pdev is a bound device.
> +        let dev = unsafe { pdev.as_ref().as_bound() };
> +        let page_table_init = IoPageTable::new(dev, pt_config);
> +        let page_table = KBox::pin_init(page_table_init, 
> GFP_KERNEL).inspect_err(|e| {
> +            pr_err!("Failed to initialize page table: {:?}\n", e);
> +        })?;
> +        let pt = page_table.access(dev).inspect_err(|e| {
> +            pr_err!("Failed to access page table: {:?}\n", e);
> +        })?;
> +
> +        let as_config = AddressSpaceConfig {
> +            transcfg: AS_TRANSCFG_PTW_MEMATTR_WB
> +                | AS_TRANSCFG_PTW_RA
> +                | AS_TRANSCFG_ADRMODE_AARCH64_4K
> +                | as_transcfg_ina_bits(u64::from(55 - va_bits)),
> +            // SAFETY: Vm::drop() evicts the address space and performs 
> deferred
> +            // cleanup before dropping the page_table Arc. This ensures that
> +            // the device stops using the page table before it is dropped
> +            transtab: unsafe { pt.ttbr() },
> +            memattr: mair_to_memattr(pt.mair()),
> +        };
> +
> +        let range = 0..(1u64 << va_bits);
> +        let reserve_range = 0..0u64;
> +
> +        let dummy_obj = gem::new_dummy_object(ddev).inspect_err(|e| {
> +            pr_err!("Failed to create dummy GEM object: {:?}\n", e);
> +        })?;
> +
> +        let gpuvm_core = kernel::drm::gpuvm::GpuVm::new::<Error, _>(
> +            c_str!("Tyr::GpuVm"),
> +            ddev,
> +            &*dummy_obj,
> +            range.clone(),
> +            reserve_range,
> +            GpuVmData {},
> +        )
> +        .inspect_err(|e| {
> +            pr_err!("Failed to create GpuVm: {:?}\n", e);
> +        })?;
> +        let gpuvm = ARef::from(&*gpuvm_core);
> +
> +        let as_data = Arc::new(VmAsData::new(&mmu, as_config, page_table), 
> GFP_KERNEL)?;
> +
> +        let vm = Arc::pin_init(
> +            pin_init!(Self{
> +                as_data: as_data,
> +                pdev: pdev.into(),
> +                mmu: mmu.into(),
> +                gpuvm: gpuvm,
> +                gpuvm_core <- new_mutex!(gpuvm_core),
> +                va_range: range,
> +            }),
> +            GFP_KERNEL,
> +        )?;
> +
> +        Ok(vm)
> +    }
> +
> +    /// Activate the VM in a hardware address space slot.
> +    pub(crate) fn activate(&self) -> Result {
> +        self.mmu
> +            .activate_vm(self.as_data.as_arc_borrow())
> +            .inspect_err(|e| {
> +                pr_err!("Failed to activate VM: {:?}\n", e);
> +            })
> +    }
> +
> +    /// Deactivate the VM by evicting it from its address space slot.
> +    fn deactivate(&self) -> Result {
> +        self.mmu.deactivate_vm(&self.as_data).inspect_err(|e| {
> +            pr_err!("Failed to deactivate VM: {:?}\n", e);
> +        })
> +    }
> +
> +    pub(crate) fn kill(&self) {
> +        // TODO: Turn the VM into a state where it can't be used.

Should we address this TODO before it gets merged? I also believe this
should be done under some lock (the gpuvm lock?) to prevent concurrent
map/unmap operations.

> +        let _ = self.deactivate().inspect_err(|e| {
> +            pr_err!("Failed to deactivate VM: {:?}\n", e);
> +        });
> +        let _ = self
> +            .unmap_range(self.va_range.start, self.va_range.end - 
> self.va_range.start)
> +            .inspect_err(|e| {
> +                pr_err!("Failed to unmap range during deactivate: {:?}\n", 
> e);
> +            });
> +    }
> +
> +    fn exec_op(
> +        &self,
> +        gpuvm_core: &mut GpuVmCore<GpuVmData>,
> +        req: VmOpRequest,
> +        resources: &mut VmOpResources,
> +    ) -> Result {
> +        let pt = self
> +            .as_data
> +            .page_table
> +            // SAFETY: pdev is a bound device.
> +            .access(unsafe { self.pdev.as_ref().as_bound() })
> +            .inspect_err(|e| {
> +                pr_err!("Failed to access page table while mapping pages: 
> {:?}\n", e);
> +            })?;
> +
> +        match req.op_type {
> +            VmOpType::Map(args) => {
> +                let mut pt_upd = PtUpdateContext::new(
> +                    pt,
> +                    &self.mmu,
> +                    &self.as_data,
> +                    req.region,
> +                    PtOpType::Map(PtMapArgs {
> +                        prot: args.flags.to_prot(),
> +                    }),
> +                    resources,
> +                )?;
> +
> +                gpuvm_core.sm_map(OpMapRequest {
> +                    addr: pt_upd.region.start,
> +                    range: pt_upd.region.end - pt_upd.region.start,
> +                    gem_offset: args.bo_offset,
> +                    vm_bo: args.vm_bo,
> +                    context: &mut pt_upd,
> +                })
> +                //PtUpdateContext drops here flushing the page table
> +            }
> +            VmOpType::Unmap => {
> +                let mut pt_upd = PtUpdateContext::new(
> +                    pt,
> +                    &self.mmu,
> +                    &self.as_data,
> +                    req.region,
> +                    PtOpType::Unmap,
> +                    resources,
> +                )?;
> +
> +                gpuvm_core.sm_unmap(
> +                    pt_upd.region.start,
> +                    pt_upd.region.end - pt_upd.region.start,
> +                    &mut pt_upd,
> +                )
> +                //PtUpdateContext drops here flushing the page table
> +            }
> +        }
> +    }
> +
> +    /// Map a GEM object range into the VM.
> +    pub(crate) fn map_bo_range(
> +        &self,
> +        bo: &Bo,
> +        bo_offset: u64,
> +        size: u64,
> +        va: u64,
> +        flags: VmMapFlags,
> +    ) -> Result {
> +        let req = VmOpRequest {
> +            op_type: VmOpType::Map(VmMapArgs {
> +                vm_bo: self.gpuvm.obtain(bo, ())?,
> +                flags,
> +                bo_offset,
> +            }),
> +            region: va..(va + size),
> +        };
> +        let mut resources = VmOpResources {
> +            preallocated_gpuvas: [
> +                Some(GpuVaAlloc::<GpuVmData>::new(GFP_KERNEL)?),
> +                Some(GpuVaAlloc::<GpuVmData>::new(GFP_KERNEL)?),
> +                Some(GpuVaAlloc::<GpuVmData>::new(GFP_KERNEL)?),
> +            ],
> +        };
> +        let mut gpuvm_core = self.gpuvm_core.lock();
> +
> +        self.exec_op(gpuvm_core.as_mut().get_mut(), req, &mut resources)?;
> +
> +        // We flush the defer cleanup list now. Things will be different in
> +        // the asynchronous VM_BIND path, where we want the cleanup to
> +        // happen outside the DMA signalling path.
> +        self.gpuvm.deferred_cleanup();
> +        Ok(())
> +    }
> +
> +    pub(crate) fn unmap_range(&self, va: u64, size: u64) -> Result {
> +        let req = VmOpRequest {
> +            op_type: VmOpType::Unmap,
> +            region: va..(va + size),
> +        };
> +        let mut resources = VmOpResources {
> +            preallocated_gpuvas: [
> +                Some(GpuVaAlloc::<GpuVmData>::new(GFP_KERNEL)?),
> +                Some(GpuVaAlloc::<GpuVmData>::new(GFP_KERNEL)?),
> +                None,
> +            ],
> +        };
> +        let mut gpuvm_core = self.gpuvm_core.lock();
> +
> +        self.exec_op(gpuvm_core.as_mut().get_mut(), req, &mut resources)?;
> +
> +        // We flush the defer cleanup list now. Things will be different in
> +        // the asynchronous VM_BIND path, where we want the cleanup to
> +        // happen outside the DMA signalling path.
> +        self.gpuvm.deferred_cleanup();
> +        Ok(())
> +    }
> +}
> +
> +impl DriverGpuVm for GpuVmData {
> +    type Driver = TyrDrmDriver;
> +    type Object = Bo;
> +    type VmBoData = ();
> +    type VaData = ();
> +    type SmContext<'ctx> = PtUpdateContext<'ctx>;
> +
> +    fn sm_step_map<'op>(
> +        &mut self,
> +        op: OpMap<'op, Self>,
> +        context: &mut Self::SmContext<'_>,
> +    ) -> Result<OpMapped<'op, Self>, Error> {
> +        let start_iova = op.addr();
> +        let mut iova = start_iova;
> +        let mut bytes_left_to_map = op.length();
> +        let mut gem_offset = op.gem_offset();
> +        let sgt = op.obj().sg_table().inspect_err(|e| {
> +            pr_err!("Failed to get sg_table: {:?}\n", e);
> +        })?;
> +        let prot = match &context.op_type {
> +            PtOpType::Map(args) => args.prot,
> +            _ => {
> +                return Err(EINVAL);
> +            }
> +        };
> +
> +        for sgt_entry in sgt.iter() {
> +            let mut paddr = sgt_entry.dma_address();
> +            let mut sgt_entry_length: u64 = sgt_entry.dma_len();
> +
> +            if bytes_left_to_map == 0 {
> +                break;
> +            }
> +
> +            if gem_offset > 0 {
> +                // Skip the entire SGT entry if the gem_offset exceeds its 
> length
> +                let skip = sgt_entry_length.min(gem_offset);
> +                paddr += skip;
> +                sgt_entry_length -= skip;
> +                gem_offset -= skip;
> +            }
> +
> +            if sgt_entry_length == 0 {
> +                continue;
> +            }
> +
> +            if gem_offset != 0 {
> +                pr_err!("Invalid gem_offset {} in page table mapping.\n", 
> gem_offset);
> +                return Err(EINVAL);
> +            }
> +            let len = sgt_entry_length.min(bytes_left_to_map);
> +
> +            let segment_mapped = match pt_map(context.pt, iova, paddr, len, 
> prot) {
> +                Ok(segment_mapped) => segment_mapped,
> +                Err(e) => {
> +                    // clean up any successful mappings from previous SGT 
> entries.
> +                    let total_mapped = iova - start_iova;
> +                    if total_mapped > 0 {
> +                        pt_unmap(context.pt, start_iova..(start_iova + 
> total_mapped)).ok();
> +                    }
> +                    return Err(e);
> +                }
> +            };
> +
> +            // Since there could be a partial mapping, only advance by the 
> actual amount mapped
> +            bytes_left_to_map -= segment_mapped;
> +            iova += segment_mapped;
> +        }
> +
> +        let gpuva = context.preallocated_gpuva()?;
> +        let op = op.insert(gpuva, pin_init::init_zeroed());
> +
> +        Ok(op)
> +    }
> +
> +    fn sm_step_unmap<'op>(
> +        &mut self,
> +        op: OpUnmap<'op, Self>,
> +        context: &mut Self::SmContext<'_>,
> +    ) -> Result<OpUnmapped<'op, Self>, Error> {
> +        let start_iova = op.va().addr();
> +        let length = op.va().length();
> +
> +        let region = start_iova..(start_iova + length);
> +        pt_unmap(context.pt, region.clone()).inspect_err(|e| {
> +            pr_err!(
> +                "Failed to unmap region {:#x}..{:#x}: {:?}\n",
> +                region.start,
> +                region.end,
> +                e
> +            );
> +        })?;
> +
> +        let (op_unmapped, _va_removed) = op.remove();
> +
> +        Ok(op_unmapped)
> +    }
> +
> +    fn sm_step_remap<'op>(
> +        &mut self,
> +        op: OpRemap<'op, Self>,
> +        context: &mut Self::SmContext<'_>,
> +    ) -> Result<OpRemapped<'op, Self>, Error> {
> +        let unmap_start = if let Some(prev) = op.prev() {
> +            prev.addr() + prev.length()
> +        } else {
> +            op.va_to_unmap().addr()
> +        };
> +
> +        let unmap_end = if let Some(next) = op.next() {
> +            next.addr()
> +        } else {
> +            op.va_to_unmap().addr() + op.va_to_unmap().length()
> +        };
> +
> +        let unmap_length = unmap_end - unmap_start;
> +
> +        if unmap_length > 0 {
> +            let region = unmap_start..(unmap_start + unmap_length);
> +            pt_unmap(context.pt, region.clone()).inspect_err(|e| {
> +                pr_err!(
> +                    "Failed to unmap remap region {:#x}..{:#x}: {:?}\n",
> +                    region.start,
> +                    region.end,
> +                    e
> +                );
> +            })?;
> +        }
> +
> +        let prev_va = context.preallocated_gpuva()?;
> +        let next_va = context.preallocated_gpuva()?;
> +
> +        let (op_remapped, _remap_ret) = op.remap(
> +            [prev_va, next_va],
> +            pin_init::init_zeroed(),
> +            pin_init::init_zeroed(),
> +        );
> +
> +        Ok(op_remapped)
> +    }
> +}
> +
> +fn mair_to_memattr(mair: u64) -> u64 {
> +    let mut memattr: u64 = 0;
> +
> +    for i in 0..8 {
> +        let in_attr = (mair >> (8 * i)) as u8;
> +        let outer = in_attr >> 4;
> +        let inner = in_attr & 0xf;
> +
> +        // For caching to be enabled, inner and outer caching policy
> +        // have to be both write-back, if one of them is write-through
> +        // or non-cacheable, we just choose non-cacheable. Device
> +        // memory is also translated to non-cacheable.
> +        let out_attr = if (outer & 3 == 0) || (outer & 4 == 0) || (inner & 4 
> == 0) {
> +            AS_MEMATTR_AARCH64_INNER_OUTER_NC
> +                | AS_MEMATTR_AARCH64_SH_MIDGARD_INNER
> +                | as_memattr_aarch64_inner_alloc_expl(false, false)
> +        } else {
> +            // Use SH_CPU_INNER mode so SH_IS, which is used when
> +            // IOMMU_CACHE is set, actually maps to the standard
> +            // definition of inner-shareable and not Mali's
> +            // internal-shareable mode.
> +            //
> +            // TODO: this assumes a non-coherent system.
> +            AS_MEMATTR_AARCH64_INNER_OUTER_WB
> +                | AS_MEMATTR_AARCH64_SH_MIDGARD_INNER
> +                | as_memattr_aarch64_inner_alloc_expl(inner & 1 != 0, inner 
> & 2 != 0)
> +        };
> +
> +        memattr |= (u64::from(out_attr)) << (8 * i);
> +    }
> +
> +    memattr
> +}
> +
> +// We can map multiple pages at once but we can't exceed the size of the
> +// table entry itself. So, if mapping 4KB pages, figure out how many pages
> +// can be mapped before we hit the 2MB boundary. Or, if mapping 2MB pages,
> +// figure out how many pages can be mapped before hitting the 1GB boundary
> +// Returns the page size (4KB or 2MB) and the number of pages that can be 
> mapped at that size.
> +fn get_pgsize(addr: u64, size: u64) -> (u64, u64) {
> +    // Get the distance to the next boundary of 2MB block
> +    let blk_offset_2m = addr.wrapping_neg() % (SZ_2M as u64);
> +
> +    // Use 4K blocks if the address is not 2MB aligned, or we have less than 
> 2MB to map
> +    if blk_offset_2m != 0 || size < SZ_2M as u64 {
> +        let pgcount = if blk_offset_2m == 0 {
> +            size / SZ_4K as u64
> +        } else {
> +            blk_offset_2m.min(size) / SZ_4K as u64
> +        };
> +        return (SZ_4K as u64, pgcount);
> +    }
> +
> +    let blk_offset_1g = addr.wrapping_neg() % (SZ_1G as u64);
> +    let blk_offset = if blk_offset_1g == 0 {
> +        SZ_1G as u64
> +    } else {
> +        blk_offset_1g
> +    };
> +    let pgcount = blk_offset.min(size) / SZ_2M as u64;
> +
> +    (SZ_2M as u64, pgcount)
> +}
> +
> +fn pt_map(
> +    pt: &IoPageTable<ARM64LPAES1>,
> +    iova: u64,
> +    paddr: u64,
> +    len: u64,
> +    prot: u32,
> +) -> Result<u64> {
> +    let mut segment_mapped = 0u64;
> +    while segment_mapped < len {
> +        let remaining = len - segment_mapped;
> +        let curr_iova = iova + segment_mapped;
> +        let curr_paddr = paddr + segment_mapped;
> +
> +        let (pgsize, pgcount) = get_pgsize(curr_iova | curr_paddr, 
> remaining);
> +
> +        // SAFETY: Exclusive access to the page table is ensured because
> +        // the pt reference comes from PtUpdateContext, which was
> +        // created while holding &mut Vm, preventing any other access to the
> +        // page table for the duration of this operation.
> +        let (mapped, result) = unsafe {
> +            pt.map_pages(
> +                curr_iova as usize,
> +                (curr_paddr as usize).try_into().unwrap(),
> +                pgsize as usize,
> +                pgcount as usize,
> +                prot,
> +                GFP_KERNEL,
> +            )
> +        };
> +
> +        if let Err(e) = result {
> +            pr_err!("pt.map_pages failed at iova {:#x}: {:?}\n", curr_iova, 
> e);
> +            if segment_mapped > 0 {
> +                pt_unmap(pt, iova..(iova + segment_mapped)).ok();
> +            }
> +            return Err(e);
> +        }
> +
> +        if mapped == 0 {
> +            pr_err!("Failed to map any pages at iova {:#x}\n", curr_iova);
> +            if segment_mapped > 0 {
> +                pt_unmap(pt, iova..(iova + segment_mapped)).ok();
> +            }
> +            return Err(ENOMEM);
> +        }
> +
> +        segment_mapped += mapped as u64;
> +    }
> +
> +    Ok(segment_mapped)
> +}
> +
> +fn pt_unmap(pt: &IoPageTable<ARM64LPAES1>, range: Range<u64>) -> Result {
> +    let mut iova = range.start;
> +    let mut bytes_left_to_unmap = range.end - range.start;
> +
> +    while bytes_left_to_unmap > 0 {
> +        let (pgsize, pgcount) = get_pgsize(iova, bytes_left_to_unmap);
> +
> +        // SAFETY: Exclusive access to the page table is ensured because
> +        // the pt reference comes from PtUpdateContext, which was
> +        // created while holding &mut Vm, preventing any other access to the
> +        // page table for the duration of this operation.
> +        let unmapped = unsafe { pt.unmap_pages(iova as usize, pgsize as 
> usize, pgcount as usize) };
> +
> +        if unmapped == 0 {
> +            pr_err!("Failed to unmap any bytes at iova {:#x}\n", iova);
> +            return Err(EINVAL);
> +        }
> +
> +        bytes_left_to_unmap -= unmapped as u64;
> +        iova += unmapped as u64;
> +    }
> +
> +    Ok(())
> +}

Reply via email to