On Tue, 9 Mar 2021 19:41:27 -0400
Jason Gunthorpe <j...@nvidia.com> wrote:

> On Tue, Mar 09, 2021 at 12:26:07PM -0700, Alex Williamson wrote:
> 
> > In the new series, I think the fault handler becomes (untested):
> > 
> > static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf)
> > {
> >         struct vm_area_struct *vma = vmf->vma;
> >         struct vfio_pci_device *vdev = vma->vm_private_data;
> >         unsigned long base_pfn, pgoff;
> >         vm_fault_t ret = VM_FAULT_SIGBUS;
> > 
> >         if (vfio_pci_bar_vma_to_pfn(vma, &base_pfn))
> >                 return ret;
> > 
> >         pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT;  
> 
> I don't think this math is completely safe, it needs to parse the
> vm_pgoff..
> 
> I'm worried userspace could split/punch/mangle a VMA using
> munmap/mremap/etc/etc in a way that does update the pg_off but is
> incompatible with the above.

parsing vm_pgoff is done in:

static int vfio_pci_bar_vma_to_pfn(struct vm_area_struct *vma,
                                   unsigned long *pfn)
{
        struct vfio_pci_device *vdev = vma->vm_private_data;
        struct pci_dev *pdev = vdev->pdev;
        int index;
        u64 pgoff;

        index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);

        if (index >= VFIO_PCI_ROM_REGION_INDEX ||
            !vdev->bar_mmap_supported[index] || !vdev->barmap[index])
                return -EINVAL;

        pgoff = vma->vm_pgoff &
                ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);

        *pfn = (pci_resource_start(pdev, index) >> PAGE_SHIFT) + pgoff;

        return 0;
}

But given Peter's concern about faulting individual pages, I think the
fault handler becomes:

static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf)
{
        struct vm_area_struct *vma = vmf->vma;
        struct vfio_pci_device *vdev = vma->vm_private_data;
        unsigned long vaddr, pfn;
        vm_fault_t ret = VM_FAULT_SIGBUS;

        if (vfio_pci_bar_vma_to_pfn(vma, &pfn))
                return ret;

        down_read(&vdev->memory_lock);

        if (__vfio_pci_memory_enabled(vdev)) {
                for (vaddr = vma->vm_start;
                     vaddr < vma->vm_end; vaddr += PAGE_SIZE, pfn++) {
                        ret = vmf_insert_pfn_prot(vma, vaddr, pfn,
                                        pgprot_decrypted(vma->vm_page_prot));
                        if (ret != VM_FAULT_NOPAGE) {
                                zap_vma_ptes(vma, vma->vm_start,
                                             vaddr - vma->vm_start);
                                break;
                        }
                }
        }

        up_read(&vdev->memory_lock);

        return ret;
}

Thanks,
Alex

Reply via email to