Re: [PATCH] vfio/type1: Empty batch for pfnmap pages

2021-03-25 Thread Alex Williamson
On Wed, 24 Mar 2021 21:05:52 -0400
Daniel Jordan  wrote:

> When vfio_pin_pages_remote() returns with a partial batch consisting of
> a single VM_PFNMAP pfn, a subsequent call will unfortunately try
> restoring it from batch->pages, resulting in vfio mapping the wrong page
> and unbalancing the page refcount.
> 
> Prevent the function from returning with this kind of partial batch to
> avoid the issue.  There's no explicit check for a VM_PFNMAP pfn because
> it's awkward to do so, so infer it from characteristics of the batch
> instead.  This may result in occasional false positives but keeps the
> code simpler.
> 
> Fixes: 4d83de6da265 ("vfio/type1: Batch page pinning")
> Link: https://lkml.kernel.org/r/20210323133254.33ed9...@omen.home.shazbot.org/
> Reported-by: Alex Williamson 
> Suggested-by: Alex Williamson 
> Signed-off-by: Daniel Jordan 
> ---
> 
> Alex, I couldn't immediately find a way to trigger this bug, but I can
> run your test case if you like.
> 
> This is the minimal fix, but it should still protect all calls of
> vfio_batch_unpin() from this problem.

Thanks, applied to my for-linus branch for v5.12.  The attached unit
test triggers the issue, I don't have any real world examples and was
only just experimenting with this for another series earlier this week.
Thanks,

Alex
/*
 * Alternate pages of device memory and anonymous memory within a single DMA
 * mapping.
 *
 * Run with argv[1] as a fully specified PCI device already bound to vfio-pci.
 * ex. "alternate-pfnmap :01:00.0"
 */
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

#include 
#include 
#include 

void *vaddr = (void *)0x1;
size_t map_size = 0;

int get_container(void)
{
	int container = open("/dev/vfio/vfio", O_RDWR);

	if (container < 0)
		fprintf(stderr, "Failed to open /dev/vfio/vfio, %d (%s)\n",
		   container, strerror(errno));

	return container;
}

int get_group(char *name)
{
	int seg, bus, slot, func;
	int ret, group, groupid;
	char path[50], iommu_group_path[50], *group_name;
	struct stat st;
	ssize_t len;
	struct vfio_group_status group_status = {
		.argsz = sizeof(group_status)
	};

	ret = sscanf(name, "%04x:%02x:%02x.%d", , , , );
	if (ret != 4) {
		fprintf(stderr, "Invalid device\n");
		return -EINVAL;
	}

	snprintf(path, sizeof(path),
		 "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/",
		 seg, bus, slot, func);

	ret = stat(path, );
	if (ret < 0) {
		fprintf(stderr, "No such device\n");
		return ret;
	}

	strncat(path, "iommu_group", sizeof(path) - strlen(path) - 1);

	len = readlink(path, iommu_group_path, sizeof(iommu_group_path));
	if (len <= 0) {
		fprintf(stderr, "No iommu_group for device\n");
		return -EINVAL;
	}

	iommu_group_path[len] = 0;
	group_name = basename(iommu_group_path);

	if (sscanf(group_name, "%d", ) != 1) {
		fprintf(stderr, "Unknown group\n");
		return -EINVAL;
	}

	snprintf(path, sizeof(path), "/dev/vfio/%d", groupid);
	group = open(path, O_RDWR);
	if (group < 0) {
		fprintf(stderr, "Failed to open %s, %d (%s)\n",
		   path, group, strerror(errno));
		return group;
	}

	ret = ioctl(group, VFIO_GROUP_GET_STATUS, _status);
	if (ret) {
		fprintf(stderr, "ioctl(VFIO_GROUP_GET_STATUS) failed\n");
		return ret;
	}

	if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE)) {
		fprintf(stderr,
			"Group not viable, all devices attached to vfio?\n");
		return -1;
	}

	return group;
}

int group_set_container(int group, int container)
{
	int ret = ioctl(group, VFIO_GROUP_SET_CONTAINER, );

	if (ret)
		fprintf(stderr, "Failed to set group container\n");

	return ret;
}

int container_set_iommu(int container)
{
	int ret = ioctl(container, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU);

	if (ret)
		fprintf(stderr, "Failed to set IOMMU\n");

	return ret;
}

int group_get_device(int group, char *name)
{
	int device = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, name);

	if (device < 0)
		fprintf(stderr, "Failed to get device\n");

	return device;
}

void *mmap_device_page(int device, int prot)
{
	struct vfio_region_info config_info = {
		.argsz = sizeof(config_info),
		.index = VFIO_PCI_CONFIG_REGION_INDEX
	};
	struct vfio_region_info region_info = {
		.argsz = sizeof(region_info)
	};
	void *map = MAP_FAILED;
	unsigned int bar;
	int i, ret;

	ret = ioctl(device, VFIO_DEVICE_GET_REGION_INFO, _info);
	if (ret) {
		fprintf(stderr, "Failed to get config space region info\n");
		return map;
	}

	for (i = 0; i < 6; i++) {
		if (pread(device, , sizeof(bar), config_info.offset +
			  PCI_BASE_ADDRESS_0 + (4 * i)) != sizeof(bar)) {
			fprintf(stderr, "Error reading BAR%d\n", i);
			return map;
		}

		if (!(bar & PCI_BASE_ADDRESS_SPACE)) {
			break;
tryagain:
			if (bar & PCI_BASE_ADDRESS_MEM_TYPE_64)
i++;
		}
	}

	if (i >= 6) {
		fprintf(stderr, "No memory BARs found\n");
		return map;
	}

	region_info.index = VFIO_PCI_BAR0_REGION_INDEX + i;
	ret = ioctl(device, VFIO_DEVICE_GET_REGION_INFO, _info);
	if (ret) {
		

[PATCH] vfio/type1: Empty batch for pfnmap pages

2021-03-24 Thread Daniel Jordan
When vfio_pin_pages_remote() returns with a partial batch consisting of
a single VM_PFNMAP pfn, a subsequent call will unfortunately try
restoring it from batch->pages, resulting in vfio mapping the wrong page
and unbalancing the page refcount.

Prevent the function from returning with this kind of partial batch to
avoid the issue.  There's no explicit check for a VM_PFNMAP pfn because
it's awkward to do so, so infer it from characteristics of the batch
instead.  This may result in occasional false positives but keeps the
code simpler.

Fixes: 4d83de6da265 ("vfio/type1: Batch page pinning")
Link: https://lkml.kernel.org/r/20210323133254.33ed9...@omen.home.shazbot.org/
Reported-by: Alex Williamson 
Suggested-by: Alex Williamson 
Signed-off-by: Daniel Jordan 
---

Alex, I couldn't immediately find a way to trigger this bug, but I can
run your test case if you like.

This is the minimal fix, but it should still protect all calls of
vfio_batch_unpin() from this problem.

 drivers/vfio/vfio_iommu_type1.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index be07664a..45cbfd4879a5 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -739,6 +739,12 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, 
unsigned long vaddr,
ret = vfio_lock_acct(dma, lock_acct, false);
 
 unpin_out:
+   if (batch->size == 1 && !batch->offset) {
+   /* May be a VM_PFNMAP pfn, which the batch can't remember. */
+   put_pfn(pfn, dma->prot);
+   batch->size = 0;
+   }
+
if (ret < 0) {
if (pinned && !rsvd) {
for (pfn = *pfn_base ; pinned ; pfn++, pinned--)

base-commit: 84196390620ac0e5070ae36af84c137c6216a7dc
-- 
2.31.0