[PATCH] VMCI: Update maintainers for VMCI

2022-02-27 Thread Jorgen Hansen
Remove myself as maintainer for the VMCI driver, and add Bryan
and Rajesh.

Acked-by: Rajesh Jalisatgi 
Acked-by: Bryan Tan 
Acked-by: Vishnu Dasa 
Signed-off-by: Jorgen Hansen 
---
 MAINTAINERS | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index bb6c9b5a3253..ecf22b62161e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -20717,7 +20717,8 @@ S:  Supported
 F: drivers/ptp/ptp_vmw.c
 
 VMWARE VMCI DRIVER
-M: Jorgen Hansen 
+M: Bryan Tan 
+M: Rajesh Jalisatgi 
 M: Vishnu Dasa 
 L: linux-ker...@vger.kernel.org
 L: pv-driv...@vmware.com (private)
-- 
2.25.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v3 8/8] VMCI: dma dg: add support for DMA datagrams receive

2022-02-07 Thread Jorgen Hansen
Use the DMA based receive operation instead of the ioread8_rep
based datagram receive when DMA datagrams are supported.

In the receive operation, configure the header to point to the
page aligned VMCI_MAX_DG_SIZE part of the receive buffer
using s/g configuration for the header. This ensures that the
existing dispatch routine can be used with little modification.
Initiate the receive by writing the lower 32 bit of the buffer
to the VMCI_DATA_IN_LOW_ADDR register, and wait for the busy
flag to be changed by the device using a wait queue.

The existing dispatch routine for received  datagrams is reused
for the DMA datagrams with a few modifications:
- the receive buffer is always the maximum size for DMA datagrams
  (IO ports would try with a shorter buffer first to reduce
  overhead of the ioread8_rep operation).
- for DMA datagrams, datagrams are provided contiguous in the
  buffer as opposed to IO port datagrams, where they can start
  on any page boundary

Reviewed-by: Vishnu Dasa 
Signed-off-by: Jorgen Hansen 
---
 drivers/misc/vmw_vmci/vmci_guest.c | 103 ++---
 1 file changed, 79 insertions(+), 24 deletions(-)

diff --git a/drivers/misc/vmw_vmci/vmci_guest.c 
b/drivers/misc/vmw_vmci/vmci_guest.c
index bf524217914e..aa61a687b3e2 100644
--- a/drivers/misc/vmw_vmci/vmci_guest.c
+++ b/drivers/misc/vmw_vmci/vmci_guest.c
@@ -58,6 +58,7 @@ struct vmci_guest_device {
 
struct tasklet_struct datagram_tasklet;
struct tasklet_struct bm_tasklet;
+   struct wait_queue_head inout_wq;
 
void *data_buffer;
dma_addr_t data_buffer_base;
@@ -115,6 +116,36 @@ static void vmci_write_reg(struct vmci_guest_device *dev, 
u32 val, u32 reg)
iowrite32(val, dev->iobase + reg);
 }
 
+static void vmci_read_data(struct vmci_guest_device *vmci_dev,
+  void *dest, size_t size)
+{
+   if (vmci_dev->mmio_base == NULL)
+   ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR,
+   dest, size);
+   else {
+   /*
+* For DMA datagrams, the data_buffer will contain the header 
on the
+* first page, followed by the incoming datagram(s) on the 
following
+* pages. The header uses an S/G element immediately following 
the
+* header on the first page to point to the data area.
+*/
+   struct vmci_data_in_out_header *buffer_header = 
vmci_dev->data_buffer;
+   struct vmci_sg_elem *sg_array = (struct vmci_sg_elem 
*)(buffer_header + 1);
+   size_t buffer_offset = dest - vmci_dev->data_buffer;
+
+   buffer_header->opcode = 1;
+   buffer_header->size = 1;
+   buffer_header->busy = 0;
+   sg_array[0].addr = vmci_dev->data_buffer_base + buffer_offset;
+   sg_array[0].size = size;
+
+   vmci_write_reg(vmci_dev, 
lower_32_bits(vmci_dev->data_buffer_base),
+  VMCI_DATA_IN_LOW_ADDR);
+
+   wait_event(vmci_dev->inout_wq, buffer_header->busy == 1);
+   }
+}
+
 static int vmci_write_data(struct vmci_guest_device *dev,
   struct vmci_datagram *dg)
 {
@@ -261,15 +292,17 @@ static int vmci_check_host_caps(struct pci_dev *pdev)
 }
 
 /*
- * Reads datagrams from the data in port and dispatches them. We
- * always start reading datagrams into only the first page of the
- * datagram buffer. If the datagrams don't fit into one page, we
- * use the maximum datagram buffer size for the remainder of the
- * invocation. This is a simple heuristic for not penalizing
- * small datagrams.
+ * Reads datagrams from the device and dispatches them. For IO port
+ * based access to the device, we always start reading datagrams into
+ * only the first page of the datagram buffer. If the datagrams don't
+ * fit into one page, we use the maximum datagram buffer size for the
+ * remainder of the invocation. This is a simple heuristic for not
+ * penalizing small datagrams. For DMA-based datagrams, we always
+ * use the maximum datagram buffer size, since there is no performance
+ * penalty for doing so.
  *
  * This function assumes that it has exclusive access to the data
- * in port for the duration of the call.
+ * in register(s) for the duration of the call.
  */
 static void vmci_dispatch_dgs(unsigned long data)
 {
@@ -277,23 +310,41 @@ static void vmci_dispatch_dgs(unsigned long data)
u8 *dg_in_buffer = vmci_dev->data_buffer;
struct vmci_datagram *dg;
size_t dg_in_buffer_size = VMCI_MAX_DG_SIZE;
-   size_t current_dg_in_buffer_size = PAGE_SIZE;
+   size_t current_dg_in_buffer_size;
size_t remaining_bytes;
+   bool is_io_port = vmci_dev->mmio_base == NULL;
 
BUILD_BUG_ON(VMCI_MAX_DG_SIZE < PAGE_SIZE);
 
-   ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN

[PATCH v3 7/8] VMCI: dma dg: add support for DMA datagrams sends

2022-02-07 Thread Jorgen Hansen
Use DMA based send operation from the transmit buffer instead of the
iowrite8_rep based datagram send when DMA datagrams are supported.

The outgoing datagram is sent as inline data in the VMCI transmit
buffer. Once the header has been configured, the send is initiated
by writing the lower 32 bit of the buffer base address to the
VMCI_DATA_OUT_LOW_ADDR register. Only then will the device process
the header and the datagram itself. Following that, the driver busy
waits (it isn't possible to sleep on the send path) for the header
busy flag to change - indicating that the send is complete.

Reviewed-by: Vishnu Dasa 
Signed-off-by: Jorgen Hansen 
---
 drivers/misc/vmw_vmci/vmci_guest.c | 45 --
 include/linux/vmw_vmci_defs.h  | 34 ++
 2 files changed, 77 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/vmw_vmci/vmci_guest.c 
b/drivers/misc/vmw_vmci/vmci_guest.c
index 36eade15ba87..bf524217914e 100644
--- a/drivers/misc/vmw_vmci/vmci_guest.c
+++ b/drivers/misc/vmw_vmci/vmci_guest.c
@@ -13,6 +13,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -114,6 +115,47 @@ static void vmci_write_reg(struct vmci_guest_device *dev, 
u32 val, u32 reg)
iowrite32(val, dev->iobase + reg);
 }
 
+static int vmci_write_data(struct vmci_guest_device *dev,
+  struct vmci_datagram *dg)
+{
+   int result;
+
+   if (dev->mmio_base != NULL) {
+   struct vmci_data_in_out_header *buffer_header = dev->tx_buffer;
+   u8 *dg_out_buffer = (u8 *)(buffer_header + 1);
+
+   if (VMCI_DG_SIZE(dg) > VMCI_MAX_DG_SIZE)
+   return VMCI_ERROR_INVALID_ARGS;
+
+   /*
+* Initialize send buffer with outgoing datagram
+* and set up header for inline data. Device will
+* not access buffer asynchronously - only after
+* the write to VMCI_DATA_OUT_LOW_ADDR.
+*/
+   memcpy(dg_out_buffer, dg, VMCI_DG_SIZE(dg));
+   buffer_header->opcode = 0;
+   buffer_header->size = VMCI_DG_SIZE(dg);
+   buffer_header->busy = 1;
+
+   vmci_write_reg(dev, lower_32_bits(dev->tx_buffer_base),
+  VMCI_DATA_OUT_LOW_ADDR);
+
+   /* Caller holds a spinlock, so cannot block. */
+   spin_until_cond(buffer_header->busy == 0);
+
+   result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
+   if (result == VMCI_SUCCESS)
+   result = (int)buffer_header->result;
+   } else {
+   iowrite8_rep(dev->iobase + VMCI_DATA_OUT_ADDR,
+dg, VMCI_DG_SIZE(dg));
+   result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
+   }
+
+   return result;
+}
+
 /*
  * VM to hypervisor call mechanism. We use the standard VMware naming
  * convention since shared code is calling this function as well.
@@ -139,8 +181,7 @@ int vmci_send_datagram(struct vmci_datagram *dg)
spin_lock_irqsave(_dev_spinlock, flags);
 
if (vmci_dev_g) {
-   iowrite8_rep(vmci_dev_g->iobase + VMCI_DATA_OUT_ADDR,
-dg, VMCI_DG_SIZE(dg));
+   vmci_write_data(vmci_dev_g, dg);
result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
} else {
result = VMCI_ERROR_UNAVAILABLE;
diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h
index 8bc37d8244a8..6fb663b36f72 100644
--- a/include/linux/vmw_vmci_defs.h
+++ b/include/linux/vmw_vmci_defs.h
@@ -110,6 +110,40 @@ enum {
 #define VMCI_MMIO_ACCESS_OFFSET((size_t)(128 * 1024))
 #define VMCI_MMIO_ACCESS_SIZE  ((size_t)(64 * 1024))
 
+/*
+ * For VMCI devices supporting the VMCI_CAPS_DMA_DATAGRAM capability, the
+ * sending and receiving of datagrams can be performed using DMA to/from
+ * a driver allocated buffer.
+ * Sending and receiving will be handled as follows:
+ * - when sending datagrams, the driver initializes the buffer where the
+ *   data part will refer to the outgoing VMCI datagram, sets the busy flag
+ *   to 1 and writes the address of the buffer to VMCI_DATA_OUT_HIGH_ADDR
+ *   and VMCI_DATA_OUT_LOW_ADDR. Writing to VMCI_DATA_OUT_LOW_ADDR triggers
+ *   the device processing of the buffer. When the device has processed the
+ *   buffer, it will write the result value to the buffer and then clear the
+ *   busy flag.
+ * - when receiving datagrams, the driver initializes the buffer where the
+ *   data part will describe the receive buffer, clears the busy flag and
+ *   writes the address of the buffer to VMCI_DATA_IN_HIGH_ADDR and
+ *   VMCI_DATA_IN_LOW_ADDR. Writing to VMCI_DATA_IN_LOW_ADDR triggers the
+ *   device processing of the buffer. The device will copy as many available
+ *   datagrams into t

[PATCH v3 5/8] VMCI: dma dg: register dummy IRQ handlers for DMA datagrams

2022-02-07 Thread Jorgen Hansen
Register dummy interrupt handlers for DMA datagrams in preparation for
DMA datagram receive operations.

Reviewed-by: Vishnu Dasa 
Signed-off-by: Jorgen Hansen 
---
 drivers/misc/vmw_vmci/vmci_guest.c | 42 +++---
 include/linux/vmw_vmci_defs.h  | 14 --
 2 files changed, 51 insertions(+), 5 deletions(-)

diff --git a/drivers/misc/vmw_vmci/vmci_guest.c 
b/drivers/misc/vmw_vmci/vmci_guest.c
index ced187e7ac08..acef19c562b3 100644
--- a/drivers/misc/vmw_vmci/vmci_guest.c
+++ b/drivers/misc/vmw_vmci/vmci_guest.c
@@ -414,6 +414,9 @@ static irqreturn_t vmci_interrupt(int irq, void *_dev)
icr &= ~VMCI_ICR_NOTIFICATION;
}
 
+   if (icr & VMCI_ICR_DMA_DATAGRAM)
+   icr &= ~VMCI_ICR_DMA_DATAGRAM;
+
if (icr != 0)
dev_warn(dev->dev,
 "Ignoring unknown interrupt cause (%d)\n",
@@ -438,6 +441,16 @@ static irqreturn_t vmci_interrupt_bm(int irq, void *_dev)
return IRQ_HANDLED;
 }
 
+/*
+ * Interrupt handler for MSI-X interrupt vector VMCI_INTR_DMA_DATAGRAM,
+ * which is for the completion of a DMA datagram send or receive operation.
+ * Will only get called if we are using MSI-X with exclusive vectors.
+ */
+static irqreturn_t vmci_interrupt_dma_datagram(int irq, void *_dev)
+{
+   return IRQ_HANDLED;
+}
+
 /*
  * Most of the initialization at module load time is done here.
  */
@@ -447,6 +460,7 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
struct vmci_guest_device *vmci_dev;
void __iomem *iobase = NULL;
void __iomem *mmio_base = NULL;
+   unsigned int num_irq_vectors;
unsigned int capabilities;
unsigned int caps_in_use;
unsigned long cmd;
@@ -627,8 +641,12 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
 * Enable interrupts.  Try MSI-X first, then MSI, and then fallback on
 * legacy interrupts.
 */
-   error = pci_alloc_irq_vectors(pdev, VMCI_MAX_INTRS, VMCI_MAX_INTRS,
-   PCI_IRQ_MSIX);
+   if (vmci_dev->mmio_base != NULL)
+   num_irq_vectors = VMCI_MAX_INTRS;
+   else
+   num_irq_vectors = VMCI_MAX_INTRS_NOTIFICATION;
+   error = pci_alloc_irq_vectors(pdev, num_irq_vectors, num_irq_vectors,
+ PCI_IRQ_MSIX);
if (error < 0) {
error = pci_alloc_irq_vectors(pdev, 1, 1,
PCI_IRQ_MSIX | PCI_IRQ_MSI | PCI_IRQ_LEGACY);
@@ -666,6 +684,17 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
pci_irq_vector(pdev, 1), error);
goto err_free_irq;
}
+   if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) {
+   error = request_irq(pci_irq_vector(pdev, 2),
+   vmci_interrupt_dma_datagram,
+   0, KBUILD_MODNAME, vmci_dev);
+   if (error) {
+   dev_err(>dev,
+   "Failed to allocate irq %u: %d\n",
+   pci_irq_vector(pdev, 2), error);
+   goto err_free_bm_irq;
+   }
+   }
}
 
dev_dbg(>dev, "Registered device\n");
@@ -676,6 +705,8 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
cmd = VMCI_IMR_DATAGRAM;
if (caps_in_use & VMCI_CAPS_NOTIFICATIONS)
cmd |= VMCI_IMR_NOTIFICATION;
+   if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM)
+   cmd |= VMCI_IMR_DMA_DATAGRAM;
vmci_write_reg(vmci_dev, cmd, VMCI_IMR_ADDR);
 
/* Enable interrupts. */
@@ -686,6 +717,8 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
vmci_call_vsock_callback(false);
return 0;
 
+err_free_bm_irq:
+   free_irq(pci_irq_vector(pdev, 1), vmci_dev);
 err_free_irq:
free_irq(pci_irq_vector(pdev, 0), vmci_dev);
tasklet_kill(_dev->datagram_tasklet);
@@ -751,8 +784,11 @@ static void vmci_guest_remove_device(struct pci_dev *pdev)
 * MSI-X, we might have multiple vectors, each with their own
 * IRQ, which we must free too.
 */
-   if (vmci_dev->exclusive_vectors)
+   if (vmci_dev->exclusive_vectors) {
free_irq(pci_irq_vector(pdev, 1), vmci_dev);
+   if (vmci_dev->mmio_base != NULL)
+   free_irq(pci_irq_vector(pdev, 2), vmci_dev);
+   }
free_irq(pci_irq_vector(pdev, 0), vmci_dev);
pci_free_irq_vectors(pdev);
 
diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h
index 4167779469fd..2b70c024dacb 100644
--- a/include/linux/vmw_vmci_defs.h
+++ b/include/linux/v

[PATCH v3 4/8] VMCI: dma dg: set OS page size

2022-02-07 Thread Jorgen Hansen
Tell the device the page size used by the OS.

Reviewed-by: Vishnu Dasa 
Signed-off-by: Jorgen Hansen 
---
 drivers/misc/vmw_vmci/vmci_guest.c | 4 
 include/linux/vmw_vmci_defs.h  | 1 +
 2 files changed, 5 insertions(+)

diff --git a/drivers/misc/vmw_vmci/vmci_guest.c 
b/drivers/misc/vmw_vmci/vmci_guest.c
index b93afe7f7119..ced187e7ac08 100644
--- a/drivers/misc/vmw_vmci/vmci_guest.c
+++ b/drivers/misc/vmw_vmci/vmci_guest.c
@@ -578,6 +578,10 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
/* Let the host know which capabilities we intend to use. */
vmci_write_reg(vmci_dev, caps_in_use, VMCI_CAPS_ADDR);
 
+   /* Let the device know the size for pages passed down. */
+   if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM)
+   vmci_write_reg(vmci_dev, PAGE_SHIFT, VMCI_GUEST_PAGE_SHIFT);
+
/* Set up global device so that we can start sending datagrams */
spin_lock_irq(_dev_spinlock);
vmci_dev_g = vmci_dev;
diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h
index 1ce2cffdc3ae..4167779469fd 100644
--- a/include/linux/vmw_vmci_defs.h
+++ b/include/linux/vmw_vmci_defs.h
@@ -21,6 +21,7 @@
 #define VMCI_CAPS_ADDR  0x18
 #define VMCI_RESULT_LOW_ADDR0x1c
 #define VMCI_RESULT_HIGH_ADDR   0x20
+#define VMCI_GUEST_PAGE_SHIFT   0x34
 
 /* Max number of devices. */
 #define VMCI_MAX_DEVICES 1
-- 
2.25.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v3 3/8] VMCI: dma dg: detect DMA datagram capability

2022-02-07 Thread Jorgen Hansen
Detect the VMCI DMA datagram capability, and if present, ack it
to the device.

Reviewed-by: Vishnu Dasa 
Signed-off-by: Jorgen Hansen 
---
 drivers/misc/vmw_vmci/vmci_guest.c | 11 +++
 include/linux/vmw_vmci_defs.h  |  1 +
 2 files changed, 12 insertions(+)

diff --git a/drivers/misc/vmw_vmci/vmci_guest.c 
b/drivers/misc/vmw_vmci/vmci_guest.c
index d30d66258e52..b93afe7f7119 100644
--- a/drivers/misc/vmw_vmci/vmci_guest.c
+++ b/drivers/misc/vmw_vmci/vmci_guest.c
@@ -562,6 +562,17 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
}
}
 
+   if (mmio_base != NULL) {
+   if (capabilities & VMCI_CAPS_DMA_DATAGRAM) {
+   caps_in_use |= VMCI_CAPS_DMA_DATAGRAM;
+   } else {
+   dev_err(>dev,
+   "Missing capability: VMCI_CAPS_DMA_DATAGRAM\n");
+   error = -ENXIO;
+   goto err_free_data_buffer;
+   }
+   }
+
dev_info(>dev, "Using capabilities 0x%x\n", caps_in_use);
 
/* Let the host know which capabilities we intend to use. */
diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h
index 8fc00e2685cf..1ce2cffdc3ae 100644
--- a/include/linux/vmw_vmci_defs.h
+++ b/include/linux/vmw_vmci_defs.h
@@ -39,6 +39,7 @@
 #define VMCI_CAPS_DATAGRAM  BIT(2)
 #define VMCI_CAPS_NOTIFICATIONS BIT(3)
 #define VMCI_CAPS_PPN64 BIT(4)
+#define VMCI_CAPS_DMA_DATAGRAM  BIT(5)
 
 /* Interrupt Cause register bits. */
 #define VMCI_ICR_DATAGRAM  BIT(0)
-- 
2.25.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v3 6/8] VMCI: dma dg: allocate send and receive buffers for DMA datagrams

2022-02-07 Thread Jorgen Hansen
If DMA datagrams are used, allocate send and receive buffers
in coherent DMA memory.

This is done in preparation for the send and receive datagram
operations, where the buffers are used for the exchange of data
between driver and device.

Reviewed-by: Vishnu Dasa 
Signed-off-by: Jorgen Hansen 
---
 drivers/misc/vmw_vmci/vmci_guest.c | 71 ++
 include/linux/vmw_vmci_defs.h  |  4 ++
 2 files changed, 66 insertions(+), 9 deletions(-)

diff --git a/drivers/misc/vmw_vmci/vmci_guest.c 
b/drivers/misc/vmw_vmci/vmci_guest.c
index acef19c562b3..36eade15ba87 100644
--- a/drivers/misc/vmw_vmci/vmci_guest.c
+++ b/drivers/misc/vmw_vmci/vmci_guest.c
@@ -31,6 +31,12 @@
 
 #define VMCI_UTIL_NUM_RESOURCES 1
 
+/*
+ * Datagram buffers for DMA send/receive must accommodate at least
+ * a maximum sized datagram and the header.
+ */
+#define VMCI_DMA_DG_BUFFER_SIZE (VMCI_MAX_DG_SIZE + PAGE_SIZE)
+
 static bool vmci_disable_msi;
 module_param_named(disable_msi, vmci_disable_msi, bool, 0);
 MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)");
@@ -53,6 +59,9 @@ struct vmci_guest_device {
struct tasklet_struct bm_tasklet;
 
void *data_buffer;
+   dma_addr_t data_buffer_base;
+   void *tx_buffer;
+   dma_addr_t tx_buffer_base;
void *notification_bitmap;
dma_addr_t notification_base;
 };
@@ -451,6 +460,24 @@ static irqreturn_t vmci_interrupt_dma_datagram(int irq, 
void *_dev)
return IRQ_HANDLED;
 }
 
+static void vmci_free_dg_buffers(struct vmci_guest_device *vmci_dev)
+{
+   if (vmci_dev->mmio_base != NULL) {
+   if (vmci_dev->tx_buffer != NULL)
+   dma_free_coherent(vmci_dev->dev,
+ VMCI_DMA_DG_BUFFER_SIZE,
+ vmci_dev->tx_buffer,
+ vmci_dev->tx_buffer_base);
+   if (vmci_dev->data_buffer != NULL)
+   dma_free_coherent(vmci_dev->dev,
+ VMCI_DMA_DG_BUFFER_SIZE,
+ vmci_dev->data_buffer,
+ vmci_dev->data_buffer_base);
+   } else {
+   vfree(vmci_dev->data_buffer);
+   }
+}
+
 /*
  * Most of the initialization at module load time is done here.
  */
@@ -517,11 +544,27 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
tasklet_init(_dev->bm_tasklet,
 vmci_process_bitmap, (unsigned long)vmci_dev);
 
-   vmci_dev->data_buffer = vmalloc(VMCI_MAX_DG_SIZE);
+   if (mmio_base != NULL) {
+   vmci_dev->tx_buffer = dma_alloc_coherent(>dev, 
VMCI_DMA_DG_BUFFER_SIZE,
+
_dev->tx_buffer_base,
+GFP_KERNEL);
+   if (!vmci_dev->tx_buffer) {
+   dev_err(>dev,
+   "Can't allocate memory for datagram tx 
buffer\n");
+   return -ENOMEM;
+   }
+
+   vmci_dev->data_buffer = dma_alloc_coherent(>dev, 
VMCI_DMA_DG_BUFFER_SIZE,
+  
_dev->data_buffer_base,
+  GFP_KERNEL);
+   } else {
+   vmci_dev->data_buffer = vmalloc(VMCI_MAX_DG_SIZE);
+   }
if (!vmci_dev->data_buffer) {
dev_err(>dev,
"Can't allocate memory for datagram buffer\n");
-   return -ENOMEM;
+   error = -ENOMEM;
+   goto err_free_data_buffers;
}
 
pci_set_master(pdev);   /* To enable queue_pair functionality. */
@@ -539,7 +582,7 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
if (!(capabilities & VMCI_CAPS_DATAGRAM)) {
dev_err(>dev, "Device does not support datagrams\n");
error = -ENXIO;
-   goto err_free_data_buffer;
+   goto err_free_data_buffers;
}
caps_in_use = VMCI_CAPS_DATAGRAM;
 
@@ -583,7 +626,7 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
dev_err(>dev,
"Missing capability: VMCI_CAPS_DMA_DATAGRAM\n");
error = -ENXIO;
-   goto err_free_data_buffer;
+   goto err_free_data_buffers;
}
}
 
@@ -592,10 +635,17 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
/* Let the host know which capabilities we intend to use. */
vmci_write_reg(vmci_dev, caps_in_use, VMCI_CAPS_ADDR);
 
-   /* Let the device know the size for pages passed down. */
-   if (caps_in_use & VMCI_CAPS_DM

[PATCH v3 2/8] VMCI: dma dg: add MMIO access to registers

2022-02-07 Thread Jorgen Hansen
Detect the support for MMIO access through examination of the length
of the region requested in BAR1. If it is 256KB, the VMCI device
supports MMIO access to registers.

If MMIO access is supported, map the area of the region used for
MMIO access (64KB size at offset 128KB).

Add wrapper functions for accessing 32 bit register accesses through
either MMIO or IO ports based on device configuration.

Sending and receiving datagrams through iowrite8_rep/ioread8_rep is
left unchanged for now, and will be addressed in a later change.

Reviewed-by: Vishnu Dasa 
Signed-off-by: Jorgen Hansen 
---
 drivers/misc/vmw_vmci/vmci_guest.c | 67 +-
 include/linux/vmw_vmci_defs.h  | 12 ++
 2 files changed, 60 insertions(+), 19 deletions(-)

diff --git a/drivers/misc/vmw_vmci/vmci_guest.c 
b/drivers/misc/vmw_vmci/vmci_guest.c
index 1018dc77269d..d30d66258e52 100644
--- a/drivers/misc/vmw_vmci/vmci_guest.c
+++ b/drivers/misc/vmw_vmci/vmci_guest.c
@@ -45,6 +45,7 @@ static u32 vm_context_id = VMCI_INVALID_ID;
 struct vmci_guest_device {
struct device *dev; /* PCI device we are attached to */
void __iomem *iobase;
+   void __iomem *mmio_base;
 
bool exclusive_vectors;
 
@@ -89,6 +90,21 @@ u32 vmci_get_vm_context_id(void)
return vm_context_id;
 }
 
+static unsigned int vmci_read_reg(struct vmci_guest_device *dev, u32 reg)
+{
+   if (dev->mmio_base != NULL)
+   return readl(dev->mmio_base + reg);
+   return ioread32(dev->iobase + reg);
+}
+
+static void vmci_write_reg(struct vmci_guest_device *dev, u32 val, u32 reg)
+{
+   if (dev->mmio_base != NULL)
+   writel(val, dev->mmio_base + reg);
+   else
+   iowrite32(val, dev->iobase + reg);
+}
+
 /*
  * VM to hypervisor call mechanism. We use the standard VMware naming
  * convention since shared code is calling this function as well.
@@ -116,7 +132,7 @@ int vmci_send_datagram(struct vmci_datagram *dg)
if (vmci_dev_g) {
iowrite8_rep(vmci_dev_g->iobase + VMCI_DATA_OUT_ADDR,
 dg, VMCI_DG_SIZE(dg));
-   result = ioread32(vmci_dev_g->iobase + VMCI_RESULT_LOW_ADDR);
+   result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
} else {
result = VMCI_ERROR_UNAVAILABLE;
}
@@ -384,7 +400,7 @@ static irqreturn_t vmci_interrupt(int irq, void *_dev)
unsigned int icr;
 
/* Acknowledge interrupt and determine what needs doing. */
-   icr = ioread32(dev->iobase + VMCI_ICR_ADDR);
+   icr = vmci_read_reg(dev, VMCI_ICR_ADDR);
if (icr == 0 || icr == ~0)
return IRQ_NONE;
 
@@ -429,7 +445,8 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
   const struct pci_device_id *id)
 {
struct vmci_guest_device *vmci_dev;
-   void __iomem *iobase;
+   void __iomem *iobase = NULL;
+   void __iomem *mmio_base = NULL;
unsigned int capabilities;
unsigned int caps_in_use;
unsigned long cmd;
@@ -445,16 +462,29 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
return error;
}
 
-   error = pcim_iomap_regions(pdev, 1 << 0, KBUILD_MODNAME);
-   if (error) {
-   dev_err(>dev, "Failed to reserve/map IO regions\n");
-   return error;
-   }
+   /*
+* The VMCI device with mmio access to registers requests 256KB
+* for BAR1. If present, driver will use new VMCI device
+* functionality for register access and datagram send/recv.
+*/
 
-   iobase = pcim_iomap_table(pdev)[0];
+   if (pci_resource_len(pdev, 1) == VMCI_WITH_MMIO_ACCESS_BAR_SIZE) {
+   dev_info(>dev, "MMIO register access is available\n");
+   mmio_base = pci_iomap_range(pdev, 1, VMCI_MMIO_ACCESS_OFFSET,
+   VMCI_MMIO_ACCESS_SIZE);
+   /* If the map fails, we fall back to IOIO access. */
+   if (!mmio_base)
+   dev_warn(>dev, "Failed to map MMIO register 
access\n");
+   }
 
-   dev_info(>dev, "Found VMCI PCI device at %#lx, irq %u\n",
-(unsigned long)iobase, pdev->irq);
+   if (!mmio_base) {
+   error = pcim_iomap_regions(pdev, BIT(0), KBUILD_MODNAME);
+   if (error) {
+   dev_err(>dev, "Failed to reserve/map IO 
regions\n");
+   return error;
+   }
+   iobase = pcim_iomap_table(pdev)[0];
+   }
 
vmci_dev = devm_kzalloc(>dev, sizeof(*vmci_dev), GFP_KERNEL);
if (!vmci_dev) {
@@ -466,6 +496,7 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
vmci_dev->dev = >dev;
v

[PATCH v3 1/8] VMCI: dma dg: whitespace formatting change for vmci register defines

2022-02-07 Thread Jorgen Hansen
Update formatting of existing register defines in preparation for
adding additional register definitions for the VMCI device.

Reviewed-by: Vishnu Dasa 
Signed-off-by: Jorgen Hansen 
---
 include/linux/vmw_vmci_defs.h | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h
index e36cb114c188..9911ecfc18ba 100644
--- a/include/linux/vmw_vmci_defs.h
+++ b/include/linux/vmw_vmci_defs.h
@@ -12,15 +12,15 @@
 #include 
 
 /* Register offsets. */
-#define VMCI_STATUS_ADDR  0x00
-#define VMCI_CONTROL_ADDR 0x04
-#define VMCI_ICR_ADDR0x08
-#define VMCI_IMR_ADDR 0x0c
-#define VMCI_DATA_OUT_ADDR0x10
-#define VMCI_DATA_IN_ADDR 0x14
-#define VMCI_CAPS_ADDR0x18
-#define VMCI_RESULT_LOW_ADDR  0x1c
-#define VMCI_RESULT_HIGH_ADDR 0x20
+#define VMCI_STATUS_ADDR0x00
+#define VMCI_CONTROL_ADDR   0x04
+#define VMCI_ICR_ADDR   0x08
+#define VMCI_IMR_ADDR   0x0c
+#define VMCI_DATA_OUT_ADDR  0x10
+#define VMCI_DATA_IN_ADDR   0x14
+#define VMCI_CAPS_ADDR  0x18
+#define VMCI_RESULT_LOW_ADDR0x1c
+#define VMCI_RESULT_HIGH_ADDR   0x20
 
 /* Max number of devices. */
 #define VMCI_MAX_DEVICES 1
-- 
2.25.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v3 0/8] VMCI: dma dg: Add support for DMA datagrams

2022-02-07 Thread Jorgen Hansen
A new version of the VMCI device will introduce two new major changes:
- support MMIO access to device registers
- support send/receive of datagrams using DMA transfers instead of
  ioread8_rep/iowrite8_rep operations
This patch series updates the VMCI driver to support these new
features while maintaining backwards compatibility.

The DMA based datagram operations use a send and a receive buffer
allocated at module load time. The buffer contains a header
describing the layout of the buffer followed by either an SG list or
inline data. The header also contains a flag indicating whether the
buffer is currently owned by the driver or the device. Both for send
and receive, the driver will initialize the buffer, transfer ownership
to the device by writing the buffer address to a register, and then
wait for the ownership to be transferred back. The device will
generate an interrupt when this happens.

v2 (fixes issues flagged by kernel test robot ):
- changed type of mmio_base to void __iomem *
- made vmci_read_reg, vmci_write_reg and vmci_write_data static functions

v3:
- removed log messages for page size and BAR resources

Jorgen Hansen (8):
  VMCI: dma dg: whitespace formatting change for vmci register defines
  VMCI: dma dg: add MMIO access to registers
  VMCI: dma dg: detect DMA datagram capability
  VMCI: dma dg: set OS page size
  VMCI: dma dg: register dummy IRQ handlers for DMA datagrams
  VMCI: dma dg: allocate send and receive buffers for DMA datagrams
  VMCI: dma dg: add support for DMA datagrams sends
  VMCI: dma dg: add support for DMA datagrams receive

 drivers/misc/vmw_vmci/vmci_guest.c | 335 -
 include/linux/vmw_vmci_defs.h  |  84 +++-
 2 files changed, 355 insertions(+), 64 deletions(-)

-- 
2.25.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v2 2/8] VMCI: dma dg: add MMIO access to registers

2022-02-07 Thread Jorgen Hansen


> On 4 Feb 2022, at 16.12, Greg KH  wrote:
> 
> On Thu, Feb 03, 2022 at 05:12:31AM -0800, Jorgen Hansen wrote:
>> Detect the support for MMIO access through examination of the length
>> of the region requested in BAR1. If it is 256KB, the VMCI device
>> supports MMIO access to registers.
>> 
>> If MMIO access is supported, map the area of the region used for
>> MMIO access (64KB size at offset 128KB).
>> 
>> Add wrapper functions for accessing 32 bit register accesses through
>> either MMIO or IO ports based on device configuration.
>> 
>> Sending and receiving datagrams through iowrite8_rep/ioread8_rep is
>> left unchanged for now, and will be addressed in a later change.
>> 
>> Reviewed-by: Vishnu Dasa 
>> Signed-off-by: Jorgen Hansen 
>> ---
>> drivers/misc/vmw_vmci/vmci_guest.c | 68 ++
>> include/linux/vmw_vmci_defs.h  | 12 ++
>> 2 files changed, 62 insertions(+), 18 deletions(-)
>> 
>> diff --git a/drivers/misc/vmw_vmci/vmci_guest.c 
>> b/drivers/misc/vmw_vmci/vmci_guest.c
>> index 1018dc77269d..38ee7ed32ab9 100644
>> --- a/drivers/misc/vmw_vmci/vmci_guest.c
>> +++ b/drivers/misc/vmw_vmci/vmci_guest.c
>> @@ -45,6 +45,7 @@ static u32 vm_context_id = VMCI_INVALID_ID;
>> struct vmci_guest_device {
>>  struct device *dev; /* PCI device we are attached to */
>>  void __iomem *iobase;
>> +void __iomem *mmio_base;
>> 
>>  bool exclusive_vectors;
>> 
>> @@ -89,6 +90,21 @@ u32 vmci_get_vm_context_id(void)
>>  return vm_context_id;
>> }
>> 
>> +static unsigned int vmci_read_reg(struct vmci_guest_device *dev, u32 reg)
>> +{
>> +if (dev->mmio_base != NULL)
>> +return readl(dev->mmio_base + reg);
>> +return ioread32(dev->iobase + reg);
>> +}
>> +
>> +static void vmci_write_reg(struct vmci_guest_device *dev, u32 val, u32 reg)
>> +{
>> +if (dev->mmio_base != NULL)
>> +writel(val, dev->mmio_base + reg);
>> +else
>> +iowrite32(val, dev->iobase + reg);
>> +}
>> +
>> /*
>>  * VM to hypervisor call mechanism. We use the standard VMware naming
>>  * convention since shared code is calling this function as well.
>> @@ -116,7 +132,7 @@ int vmci_send_datagram(struct vmci_datagram *dg)
>>  if (vmci_dev_g) {
>>  iowrite8_rep(vmci_dev_g->iobase + VMCI_DATA_OUT_ADDR,
>>   dg, VMCI_DG_SIZE(dg));
>> -result = ioread32(vmci_dev_g->iobase + VMCI_RESULT_LOW_ADDR);
>> +result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
>>  } else {
>>  result = VMCI_ERROR_UNAVAILABLE;
>>  }
>> @@ -384,7 +400,7 @@ static irqreturn_t vmci_interrupt(int irq, void *_dev)
>>  unsigned int icr;
>> 
>>  /* Acknowledge interrupt and determine what needs doing. */
>> -icr = ioread32(dev->iobase + VMCI_ICR_ADDR);
>> +icr = vmci_read_reg(dev, VMCI_ICR_ADDR);
>>  if (icr == 0 || icr == ~0)
>>  return IRQ_NONE;
>> 
>> @@ -429,7 +445,8 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
>> const struct pci_device_id *id)
>> {
>>  struct vmci_guest_device *vmci_dev;
>> -void __iomem *iobase;
>> +void __iomem *iobase = NULL;
>> +void __iomem *mmio_base = NULL;
>>  unsigned int capabilities;
>>  unsigned int caps_in_use;
>>  unsigned long cmd;
>> @@ -445,16 +462,32 @@ static int vmci_guest_probe_device(struct pci_dev 
>> *pdev,
>>  return error;
>>  }
>> 
>> -error = pcim_iomap_regions(pdev, 1 << 0, KBUILD_MODNAME);
>> -if (error) {
>> -dev_err(>dev, "Failed to reserve/map IO regions\n");
>> -return error;
>> +/*
>> + * The VMCI device with mmio access to registers requests 256KB
>> + * for BAR1. If present, driver will use new VMCI device
>> + * functionality for register access and datagram send/recv.
>> + */
>> +
>> +if (pci_resource_len(pdev, 1) == VMCI_WITH_MMIO_ACCESS_BAR_SIZE) {
>> +dev_info(>dev, "MMIO register access is available\n");
>> +mmio_base = pci_iomap_range(pdev, 1, VMCI_MMIO_ACCESS_OFFSET,
>> +VMCI_MMIO_ACCESS_SIZE);
>> +/* If the map fails, we fall back to IOIO access. */
>>

Re: [PATCH v2 4/8] VMCI: dma dg: set OS page size

2022-02-07 Thread Jorgen Hansen


> On 4 Feb 2022, at 16.12, Greg KH  wrote:
> 
> On Thu, Feb 03, 2022 at 05:12:33AM -0800, Jorgen Hansen wrote:
>> Tell the device the page size used by the OS.
>> 
>> Reviewed-by: Vishnu Dasa 
>> Signed-off-by: Jorgen Hansen 
>> ---
>> drivers/misc/vmw_vmci/vmci_guest.c | 9 +
>> include/linux/vmw_vmci_defs.h  | 1 +
>> 2 files changed, 10 insertions(+)
>> 
>> diff --git a/drivers/misc/vmw_vmci/vmci_guest.c 
>> b/drivers/misc/vmw_vmci/vmci_guest.c
>> index 5a99d8e27873..808680dc0820 100644
>> --- a/drivers/misc/vmw_vmci/vmci_guest.c
>> +++ b/drivers/misc/vmw_vmci/vmci_guest.c
>> @@ -581,6 +581,15 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
>>  /* Let the host know which capabilities we intend to use. */
>>  vmci_write_reg(vmci_dev, caps_in_use, VMCI_CAPS_ADDR);
>> 
>> +if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) {
>> +uint32_t page_shift;
>> +
>> +/* Let the device know the size for pages passed down. */
>> +vmci_write_reg(vmci_dev, PAGE_SHIFT, VMCI_GUEST_PAGE_SHIFT);
>> +page_shift = vmci_read_reg(vmci_dev, VMCI_GUEST_PAGE_SHIFT);
>> +dev_info(>dev, "Using page shift %d\n", page_shift);
> 
> Please do not print out debugging stuff like this to the kernel log.

OK, I’ll remove it.

> When drivers are working properly, they are quiet.
> 
> thanks,
> 
> greg k-h

Thanks,
Jorgen
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v2 7/8] VMCI: dma dg: add support for DMA datagrams sends

2022-02-03 Thread Jorgen Hansen
Use DMA based send operation from the transmit buffer instead of the
iowrite8_rep based datagram send when DMA datagrams are supported.

The outgoing datagram is sent as inline data in the VMCI transmit
buffer. Once the header has been configured, the send is initiated
by writing the lower 32 bit of the buffer base address to the
VMCI_DATA_OUT_LOW_ADDR register. Only then will the device process
the header and the datagram itself. Following that, the driver busy
waits (it isn't possible to sleep on the send path) for the header
busy flag to change - indicating that the send is complete.

Reviewed-by: Vishnu Dasa 
Signed-off-by: Jorgen Hansen 
---
 drivers/misc/vmw_vmci/vmci_guest.c | 45 --
 include/linux/vmw_vmci_defs.h  | 34 ++
 2 files changed, 77 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/vmw_vmci/vmci_guest.c 
b/drivers/misc/vmw_vmci/vmci_guest.c
index c207ca2ca42e..ae2fd9c791d0 100644
--- a/drivers/misc/vmw_vmci/vmci_guest.c
+++ b/drivers/misc/vmw_vmci/vmci_guest.c
@@ -13,6 +13,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -114,6 +115,47 @@ static void vmci_write_reg(struct vmci_guest_device *dev, 
u32 val, u32 reg)
iowrite32(val, dev->iobase + reg);
 }
 
+static int vmci_write_data(struct vmci_guest_device *dev,
+  struct vmci_datagram *dg)
+{
+   int result;
+
+   if (dev->mmio_base != NULL) {
+   struct vmci_data_in_out_header *buffer_header = dev->tx_buffer;
+   u8 *dg_out_buffer = (u8 *)(buffer_header + 1);
+
+   if (VMCI_DG_SIZE(dg) > VMCI_MAX_DG_SIZE)
+   return VMCI_ERROR_INVALID_ARGS;
+
+   /*
+* Initialize send buffer with outgoing datagram
+* and set up header for inline data. Device will
+* not access buffer asynchronously - only after
+* the write to VMCI_DATA_OUT_LOW_ADDR.
+*/
+   memcpy(dg_out_buffer, dg, VMCI_DG_SIZE(dg));
+   buffer_header->opcode = 0;
+   buffer_header->size = VMCI_DG_SIZE(dg);
+   buffer_header->busy = 1;
+
+   vmci_write_reg(dev, lower_32_bits(dev->tx_buffer_base),
+  VMCI_DATA_OUT_LOW_ADDR);
+
+   /* Caller holds a spinlock, so cannot block. */
+   spin_until_cond(buffer_header->busy == 0);
+
+   result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
+   if (result == VMCI_SUCCESS)
+   result = (int)buffer_header->result;
+   } else {
+   iowrite8_rep(dev->iobase + VMCI_DATA_OUT_ADDR,
+dg, VMCI_DG_SIZE(dg));
+   result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
+   }
+
+   return result;
+}
+
 /*
  * VM to hypervisor call mechanism. We use the standard VMware naming
  * convention since shared code is calling this function as well.
@@ -139,8 +181,7 @@ int vmci_send_datagram(struct vmci_datagram *dg)
spin_lock_irqsave(_dev_spinlock, flags);
 
if (vmci_dev_g) {
-   iowrite8_rep(vmci_dev_g->iobase + VMCI_DATA_OUT_ADDR,
-dg, VMCI_DG_SIZE(dg));
+   vmci_write_data(vmci_dev_g, dg);
result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
} else {
result = VMCI_ERROR_UNAVAILABLE;
diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h
index 8bc37d8244a8..6fb663b36f72 100644
--- a/include/linux/vmw_vmci_defs.h
+++ b/include/linux/vmw_vmci_defs.h
@@ -110,6 +110,40 @@ enum {
 #define VMCI_MMIO_ACCESS_OFFSET((size_t)(128 * 1024))
 #define VMCI_MMIO_ACCESS_SIZE  ((size_t)(64 * 1024))
 
+/*
+ * For VMCI devices supporting the VMCI_CAPS_DMA_DATAGRAM capability, the
+ * sending and receiving of datagrams can be performed using DMA to/from
+ * a driver allocated buffer.
+ * Sending and receiving will be handled as follows:
+ * - when sending datagrams, the driver initializes the buffer where the
+ *   data part will refer to the outgoing VMCI datagram, sets the busy flag
+ *   to 1 and writes the address of the buffer to VMCI_DATA_OUT_HIGH_ADDR
+ *   and VMCI_DATA_OUT_LOW_ADDR. Writing to VMCI_DATA_OUT_LOW_ADDR triggers
+ *   the device processing of the buffer. When the device has processed the
+ *   buffer, it will write the result value to the buffer and then clear the
+ *   busy flag.
+ * - when receiving datagrams, the driver initializes the buffer where the
+ *   data part will describe the receive buffer, clears the busy flag and
+ *   writes the address of the buffer to VMCI_DATA_IN_HIGH_ADDR and
+ *   VMCI_DATA_IN_LOW_ADDR. Writing to VMCI_DATA_IN_LOW_ADDR triggers the
+ *   device processing of the buffer. The device will copy as many available
+ *   datagrams into t

[PATCH v2 4/8] VMCI: dma dg: set OS page size

2022-02-03 Thread Jorgen Hansen
Tell the device the page size used by the OS.

Reviewed-by: Vishnu Dasa 
Signed-off-by: Jorgen Hansen 
---
 drivers/misc/vmw_vmci/vmci_guest.c | 9 +
 include/linux/vmw_vmci_defs.h  | 1 +
 2 files changed, 10 insertions(+)

diff --git a/drivers/misc/vmw_vmci/vmci_guest.c 
b/drivers/misc/vmw_vmci/vmci_guest.c
index 5a99d8e27873..808680dc0820 100644
--- a/drivers/misc/vmw_vmci/vmci_guest.c
+++ b/drivers/misc/vmw_vmci/vmci_guest.c
@@ -581,6 +581,15 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
/* Let the host know which capabilities we intend to use. */
vmci_write_reg(vmci_dev, caps_in_use, VMCI_CAPS_ADDR);
 
+   if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) {
+   uint32_t page_shift;
+
+   /* Let the device know the size for pages passed down. */
+   vmci_write_reg(vmci_dev, PAGE_SHIFT, VMCI_GUEST_PAGE_SHIFT);
+   page_shift = vmci_read_reg(vmci_dev, VMCI_GUEST_PAGE_SHIFT);
+   dev_info(>dev, "Using page shift %d\n", page_shift);
+   }
+
/* Set up global device so that we can start sending datagrams */
spin_lock_irq(_dev_spinlock);
vmci_dev_g = vmci_dev;
diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h
index 1ce2cffdc3ae..4167779469fd 100644
--- a/include/linux/vmw_vmci_defs.h
+++ b/include/linux/vmw_vmci_defs.h
@@ -21,6 +21,7 @@
 #define VMCI_CAPS_ADDR  0x18
 #define VMCI_RESULT_LOW_ADDR0x1c
 #define VMCI_RESULT_HIGH_ADDR   0x20
+#define VMCI_GUEST_PAGE_SHIFT   0x34
 
 /* Max number of devices. */
 #define VMCI_MAX_DEVICES 1
-- 
2.25.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v2 8/8] VMCI: dma dg: add support for DMA datagrams receive

2022-02-03 Thread Jorgen Hansen
Use the DMA based receive operation instead of the ioread8_rep
based datagram receive when DMA datagrams are supported.

In the receive operation, configure the header to point to the
page aligned VMCI_MAX_DG_SIZE part of the receive buffer
using s/g configuration for the header. This ensures that the
existing dispatch routine can be used with little modification.
Initiate the receive by writing the lower 32 bit of the buffer
to the VMCI_DATA_IN_LOW_ADDR register, and wait for the busy
flag to be changed by the device using a wait queue.

The existing dispatch routine for received  datagrams is reused
for the DMA datagrams with a few modifications:
- the receive buffer is always the maximum size for DMA datagrams
  (IO ports would try with a shorter buffer first to reduce
  overhead of the ioread8_rep operation).
- for DMA datagrams, datagrams are provided contiguous in the
  buffer as opposed to IO port datagrams, where they can start
  on any page boundary

Reviewed-by: Vishnu Dasa 
Signed-off-by: Jorgen Hansen 
---
 drivers/misc/vmw_vmci/vmci_guest.c | 103 ++---
 1 file changed, 79 insertions(+), 24 deletions(-)

diff --git a/drivers/misc/vmw_vmci/vmci_guest.c 
b/drivers/misc/vmw_vmci/vmci_guest.c
index ae2fd9c791d0..67fac1b8f262 100644
--- a/drivers/misc/vmw_vmci/vmci_guest.c
+++ b/drivers/misc/vmw_vmci/vmci_guest.c
@@ -58,6 +58,7 @@ struct vmci_guest_device {
 
struct tasklet_struct datagram_tasklet;
struct tasklet_struct bm_tasklet;
+   struct wait_queue_head inout_wq;
 
void *data_buffer;
dma_addr_t data_buffer_base;
@@ -115,6 +116,36 @@ static void vmci_write_reg(struct vmci_guest_device *dev, 
u32 val, u32 reg)
iowrite32(val, dev->iobase + reg);
 }
 
+static void vmci_read_data(struct vmci_guest_device *vmci_dev,
+  void *dest, size_t size)
+{
+   if (vmci_dev->mmio_base == NULL)
+   ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR,
+   dest, size);
+   else {
+   /*
+* For DMA datagrams, the data_buffer will contain the header 
on the
+* first page, followed by the incoming datagram(s) on the 
following
+* pages. The header uses an S/G element immediately following 
the
+* header on the first page to point to the data area.
+*/
+   struct vmci_data_in_out_header *buffer_header = 
vmci_dev->data_buffer;
+   struct vmci_sg_elem *sg_array = (struct vmci_sg_elem 
*)(buffer_header + 1);
+   size_t buffer_offset = dest - vmci_dev->data_buffer;
+
+   buffer_header->opcode = 1;
+   buffer_header->size = 1;
+   buffer_header->busy = 0;
+   sg_array[0].addr = vmci_dev->data_buffer_base + buffer_offset;
+   sg_array[0].size = size;
+
+   vmci_write_reg(vmci_dev, 
lower_32_bits(vmci_dev->data_buffer_base),
+  VMCI_DATA_IN_LOW_ADDR);
+
+   wait_event(vmci_dev->inout_wq, buffer_header->busy == 1);
+   }
+}
+
 static int vmci_write_data(struct vmci_guest_device *dev,
   struct vmci_datagram *dg)
 {
@@ -261,15 +292,17 @@ static int vmci_check_host_caps(struct pci_dev *pdev)
 }
 
 /*
- * Reads datagrams from the data in port and dispatches them. We
- * always start reading datagrams into only the first page of the
- * datagram buffer. If the datagrams don't fit into one page, we
- * use the maximum datagram buffer size for the remainder of the
- * invocation. This is a simple heuristic for not penalizing
- * small datagrams.
+ * Reads datagrams from the device and dispatches them. For IO port
+ * based access to the device, we always start reading datagrams into
+ * only the first page of the datagram buffer. If the datagrams don't
+ * fit into one page, we use the maximum datagram buffer size for the
+ * remainder of the invocation. This is a simple heuristic for not
+ * penalizing small datagrams. For DMA-based datagrams, we always
+ * use the maximum datagram buffer size, since there is no performance
+ * penalty for doing so.
  *
  * This function assumes that it has exclusive access to the data
- * in port for the duration of the call.
+ * in register(s) for the duration of the call.
  */
 static void vmci_dispatch_dgs(unsigned long data)
 {
@@ -277,23 +310,41 @@ static void vmci_dispatch_dgs(unsigned long data)
u8 *dg_in_buffer = vmci_dev->data_buffer;
struct vmci_datagram *dg;
size_t dg_in_buffer_size = VMCI_MAX_DG_SIZE;
-   size_t current_dg_in_buffer_size = PAGE_SIZE;
+   size_t current_dg_in_buffer_size;
size_t remaining_bytes;
+   bool is_io_port = vmci_dev->mmio_base == NULL;
 
BUILD_BUG_ON(VMCI_MAX_DG_SIZE < PAGE_SIZE);
 
-   ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN

[PATCH v2 3/8] VMCI: dma dg: detect DMA datagram capability

2022-02-03 Thread Jorgen Hansen
Detect the VMCI DMA datagram capability, and if present, ack it
to the device.

Reviewed-by: Vishnu Dasa 
Signed-off-by: Jorgen Hansen 
---
 drivers/misc/vmw_vmci/vmci_guest.c | 11 +++
 include/linux/vmw_vmci_defs.h  |  1 +
 2 files changed, 12 insertions(+)

diff --git a/drivers/misc/vmw_vmci/vmci_guest.c 
b/drivers/misc/vmw_vmci/vmci_guest.c
index 38ee7ed32ab9..5a99d8e27873 100644
--- a/drivers/misc/vmw_vmci/vmci_guest.c
+++ b/drivers/misc/vmw_vmci/vmci_guest.c
@@ -565,6 +565,17 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
}
}
 
+   if (mmio_base != NULL) {
+   if (capabilities & VMCI_CAPS_DMA_DATAGRAM) {
+   caps_in_use |= VMCI_CAPS_DMA_DATAGRAM;
+   } else {
+   dev_err(>dev,
+   "Missing capability: VMCI_CAPS_DMA_DATAGRAM\n");
+   error = -ENXIO;
+   goto err_free_data_buffer;
+   }
+   }
+
dev_info(>dev, "Using capabilities 0x%x\n", caps_in_use);
 
/* Let the host know which capabilities we intend to use. */
diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h
index 8fc00e2685cf..1ce2cffdc3ae 100644
--- a/include/linux/vmw_vmci_defs.h
+++ b/include/linux/vmw_vmci_defs.h
@@ -39,6 +39,7 @@
 #define VMCI_CAPS_DATAGRAM  BIT(2)
 #define VMCI_CAPS_NOTIFICATIONS BIT(3)
 #define VMCI_CAPS_PPN64 BIT(4)
+#define VMCI_CAPS_DMA_DATAGRAM  BIT(5)
 
 /* Interrupt Cause register bits. */
 #define VMCI_ICR_DATAGRAM  BIT(0)
-- 
2.25.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v2 6/8] VMCI: dma dg: allocate send and receive buffers for DMA datagrams

2022-02-03 Thread Jorgen Hansen
If DMA datagrams are used, allocate send and receive buffers
in coherent DMA memory.

This is done in preparation for the send and receive datagram
operations, where the buffers are used for the exchange of data
between driver and device.

Reviewed-by: Vishnu Dasa 
Signed-off-by: Jorgen Hansen 
---
 drivers/misc/vmw_vmci/vmci_guest.c | 66 ++
 include/linux/vmw_vmci_defs.h  |  4 ++
 2 files changed, 63 insertions(+), 7 deletions(-)

diff --git a/drivers/misc/vmw_vmci/vmci_guest.c 
b/drivers/misc/vmw_vmci/vmci_guest.c
index 0920fbc6b64f..c207ca2ca42e 100644
--- a/drivers/misc/vmw_vmci/vmci_guest.c
+++ b/drivers/misc/vmw_vmci/vmci_guest.c
@@ -31,6 +31,12 @@
 
 #define VMCI_UTIL_NUM_RESOURCES 1
 
+/*
+ * Datagram buffers for DMA send/receive must accommodate at least
+ * a maximum sized datagram and the header.
+ */
+#define VMCI_DMA_DG_BUFFER_SIZE (VMCI_MAX_DG_SIZE + PAGE_SIZE)
+
 static bool vmci_disable_msi;
 module_param_named(disable_msi, vmci_disable_msi, bool, 0);
 MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)");
@@ -53,6 +59,9 @@ struct vmci_guest_device {
struct tasklet_struct bm_tasklet;
 
void *data_buffer;
+   dma_addr_t data_buffer_base;
+   void *tx_buffer;
+   dma_addr_t tx_buffer_base;
void *notification_bitmap;
dma_addr_t notification_base;
 };
@@ -451,6 +460,24 @@ static irqreturn_t vmci_interrupt_dma_datagram(int irq, 
void *_dev)
return IRQ_HANDLED;
 }
 
+static void vmci_free_dg_buffers(struct vmci_guest_device *vmci_dev)
+{
+   if (vmci_dev->mmio_base != NULL) {
+   if (vmci_dev->tx_buffer != NULL)
+   dma_free_coherent(vmci_dev->dev,
+ VMCI_DMA_DG_BUFFER_SIZE,
+ vmci_dev->tx_buffer,
+ vmci_dev->tx_buffer_base);
+   if (vmci_dev->data_buffer != NULL)
+   dma_free_coherent(vmci_dev->dev,
+ VMCI_DMA_DG_BUFFER_SIZE,
+ vmci_dev->data_buffer,
+ vmci_dev->data_buffer_base);
+   } else {
+   vfree(vmci_dev->data_buffer);
+   }
+}
+
 /*
  * Most of the initialization at module load time is done here.
  */
@@ -520,11 +547,27 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
tasklet_init(_dev->bm_tasklet,
 vmci_process_bitmap, (unsigned long)vmci_dev);
 
-   vmci_dev->data_buffer = vmalloc(VMCI_MAX_DG_SIZE);
+   if (mmio_base != NULL) {
+   vmci_dev->tx_buffer = dma_alloc_coherent(>dev, 
VMCI_DMA_DG_BUFFER_SIZE,
+
_dev->tx_buffer_base,
+GFP_KERNEL);
+   if (!vmci_dev->tx_buffer) {
+   dev_err(>dev,
+   "Can't allocate memory for datagram tx 
buffer\n");
+   return -ENOMEM;
+   }
+
+   vmci_dev->data_buffer = dma_alloc_coherent(>dev, 
VMCI_DMA_DG_BUFFER_SIZE,
+  
_dev->data_buffer_base,
+  GFP_KERNEL);
+   } else {
+   vmci_dev->data_buffer = vmalloc(VMCI_MAX_DG_SIZE);
+   }
if (!vmci_dev->data_buffer) {
dev_err(>dev,
"Can't allocate memory for datagram buffer\n");
-   return -ENOMEM;
+   error = -ENOMEM;
+   goto err_free_data_buffers;
}
 
pci_set_master(pdev);   /* To enable queue_pair functionality. */
@@ -542,7 +585,7 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
if (!(capabilities & VMCI_CAPS_DATAGRAM)) {
dev_err(>dev, "Device does not support datagrams\n");
error = -ENXIO;
-   goto err_free_data_buffer;
+   goto err_free_data_buffers;
}
caps_in_use = VMCI_CAPS_DATAGRAM;
 
@@ -586,7 +629,7 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
dev_err(>dev,
"Missing capability: VMCI_CAPS_DMA_DATAGRAM\n");
error = -ENXIO;
-   goto err_free_data_buffer;
+   goto err_free_data_buffers;
}
}
 
@@ -602,6 +645,12 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
vmci_write_reg(vmci_dev, PAGE_SHIFT, VMCI_GUEST_PAGE_SHIFT);
page_shift = vmci_read_reg(vmci_dev, VMCI_GUEST_PAGE_SHIFT);
dev_info(>dev, "Using page shift %d\n", page_shift);
+
+  

[PATCH v2 5/8] VMCI: dma dg: register dummy IRQ handlers for DMA datagrams

2022-02-03 Thread Jorgen Hansen
Register dummy interrupt handlers for DMA datagrams in preparation for
DMA datagram receive operations.

Reviewed-by: Vishnu Dasa 
Signed-off-by: Jorgen Hansen 
---
 drivers/misc/vmw_vmci/vmci_guest.c | 42 +++---
 include/linux/vmw_vmci_defs.h  | 14 --
 2 files changed, 51 insertions(+), 5 deletions(-)

diff --git a/drivers/misc/vmw_vmci/vmci_guest.c 
b/drivers/misc/vmw_vmci/vmci_guest.c
index 808680dc0820..0920fbc6b64f 100644
--- a/drivers/misc/vmw_vmci/vmci_guest.c
+++ b/drivers/misc/vmw_vmci/vmci_guest.c
@@ -414,6 +414,9 @@ static irqreturn_t vmci_interrupt(int irq, void *_dev)
icr &= ~VMCI_ICR_NOTIFICATION;
}
 
+   if (icr & VMCI_ICR_DMA_DATAGRAM)
+   icr &= ~VMCI_ICR_DMA_DATAGRAM;
+
if (icr != 0)
dev_warn(dev->dev,
 "Ignoring unknown interrupt cause (%d)\n",
@@ -438,6 +441,16 @@ static irqreturn_t vmci_interrupt_bm(int irq, void *_dev)
return IRQ_HANDLED;
 }
 
+/*
+ * Interrupt handler for MSI-X interrupt vector VMCI_INTR_DMA_DATAGRAM,
+ * which is for the completion of a DMA datagram send or receive operation.
+ * Will only get called if we are using MSI-X with exclusive vectors.
+ */
+static irqreturn_t vmci_interrupt_dma_datagram(int irq, void *_dev)
+{
+   return IRQ_HANDLED;
+}
+
 /*
  * Most of the initialization at module load time is done here.
  */
@@ -447,6 +460,7 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
struct vmci_guest_device *vmci_dev;
void __iomem *iobase = NULL;
void __iomem *mmio_base = NULL;
+   unsigned int num_irq_vectors;
unsigned int capabilities;
unsigned int caps_in_use;
unsigned long cmd;
@@ -635,8 +649,12 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
 * Enable interrupts.  Try MSI-X first, then MSI, and then fallback on
 * legacy interrupts.
 */
-   error = pci_alloc_irq_vectors(pdev, VMCI_MAX_INTRS, VMCI_MAX_INTRS,
-   PCI_IRQ_MSIX);
+   if (vmci_dev->mmio_base != NULL)
+   num_irq_vectors = VMCI_MAX_INTRS;
+   else
+   num_irq_vectors = VMCI_MAX_INTRS_NOTIFICATION;
+   error = pci_alloc_irq_vectors(pdev, num_irq_vectors, num_irq_vectors,
+ PCI_IRQ_MSIX);
if (error < 0) {
error = pci_alloc_irq_vectors(pdev, 1, 1,
PCI_IRQ_MSIX | PCI_IRQ_MSI | PCI_IRQ_LEGACY);
@@ -674,6 +692,17 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
pci_irq_vector(pdev, 1), error);
goto err_free_irq;
}
+   if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) {
+   error = request_irq(pci_irq_vector(pdev, 2),
+   vmci_interrupt_dma_datagram,
+   0, KBUILD_MODNAME, vmci_dev);
+   if (error) {
+   dev_err(>dev,
+   "Failed to allocate irq %u: %d\n",
+   pci_irq_vector(pdev, 2), error);
+   goto err_free_bm_irq;
+   }
+   }
}
 
dev_dbg(>dev, "Registered device\n");
@@ -684,6 +713,8 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
cmd = VMCI_IMR_DATAGRAM;
if (caps_in_use & VMCI_CAPS_NOTIFICATIONS)
cmd |= VMCI_IMR_NOTIFICATION;
+   if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM)
+   cmd |= VMCI_IMR_DMA_DATAGRAM;
vmci_write_reg(vmci_dev, cmd, VMCI_IMR_ADDR);
 
/* Enable interrupts. */
@@ -694,6 +725,8 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
vmci_call_vsock_callback(false);
return 0;
 
+err_free_bm_irq:
+   free_irq(pci_irq_vector(pdev, 1), vmci_dev);
 err_free_irq:
free_irq(pci_irq_vector(pdev, 0), vmci_dev);
tasklet_kill(_dev->datagram_tasklet);
@@ -759,8 +792,11 @@ static void vmci_guest_remove_device(struct pci_dev *pdev)
 * MSI-X, we might have multiple vectors, each with their own
 * IRQ, which we must free too.
 */
-   if (vmci_dev->exclusive_vectors)
+   if (vmci_dev->exclusive_vectors) {
free_irq(pci_irq_vector(pdev, 1), vmci_dev);
+   if (vmci_dev->mmio_base != NULL)
+   free_irq(pci_irq_vector(pdev, 2), vmci_dev);
+   }
free_irq(pci_irq_vector(pdev, 0), vmci_dev);
pci_free_irq_vectors(pdev);
 
diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h
index 4167779469fd..2b70c024dacb 100644
--- a/include/linux/vmw_vmci_defs.h
+++ b/include/linux/v

[PATCH v2 1/8] VMCI: dma dg: whitespace formatting change for vmci register defines

2022-02-03 Thread Jorgen Hansen
Update formatting of existing register defines in preparation for
adding additional register definitions for the VMCI device.

Reviewed-by: Vishnu Dasa 
Signed-off-by: Jorgen Hansen 
---
 include/linux/vmw_vmci_defs.h | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h
index e36cb114c188..9911ecfc18ba 100644
--- a/include/linux/vmw_vmci_defs.h
+++ b/include/linux/vmw_vmci_defs.h
@@ -12,15 +12,15 @@
 #include 
 
 /* Register offsets. */
-#define VMCI_STATUS_ADDR  0x00
-#define VMCI_CONTROL_ADDR 0x04
-#define VMCI_ICR_ADDR0x08
-#define VMCI_IMR_ADDR 0x0c
-#define VMCI_DATA_OUT_ADDR0x10
-#define VMCI_DATA_IN_ADDR 0x14
-#define VMCI_CAPS_ADDR0x18
-#define VMCI_RESULT_LOW_ADDR  0x1c
-#define VMCI_RESULT_HIGH_ADDR 0x20
+#define VMCI_STATUS_ADDR0x00
+#define VMCI_CONTROL_ADDR   0x04
+#define VMCI_ICR_ADDR   0x08
+#define VMCI_IMR_ADDR   0x0c
+#define VMCI_DATA_OUT_ADDR  0x10
+#define VMCI_DATA_IN_ADDR   0x14
+#define VMCI_CAPS_ADDR  0x18
+#define VMCI_RESULT_LOW_ADDR0x1c
+#define VMCI_RESULT_HIGH_ADDR   0x20
 
 /* Max number of devices. */
 #define VMCI_MAX_DEVICES 1
-- 
2.25.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v2 2/8] VMCI: dma dg: add MMIO access to registers

2022-02-03 Thread Jorgen Hansen
Detect the support for MMIO access through examination of the length
of the region requested in BAR1. If it is 256KB, the VMCI device
supports MMIO access to registers.

If MMIO access is supported, map the area of the region used for
MMIO access (64KB size at offset 128KB).

Add wrapper functions for accessing 32 bit register accesses through
either MMIO or IO ports based on device configuration.

Sending and receiving datagrams through iowrite8_rep/ioread8_rep is
left unchanged for now, and will be addressed in a later change.

Reviewed-by: Vishnu Dasa 
Signed-off-by: Jorgen Hansen 
---
 drivers/misc/vmw_vmci/vmci_guest.c | 68 ++
 include/linux/vmw_vmci_defs.h  | 12 ++
 2 files changed, 62 insertions(+), 18 deletions(-)

diff --git a/drivers/misc/vmw_vmci/vmci_guest.c 
b/drivers/misc/vmw_vmci/vmci_guest.c
index 1018dc77269d..38ee7ed32ab9 100644
--- a/drivers/misc/vmw_vmci/vmci_guest.c
+++ b/drivers/misc/vmw_vmci/vmci_guest.c
@@ -45,6 +45,7 @@ static u32 vm_context_id = VMCI_INVALID_ID;
 struct vmci_guest_device {
struct device *dev; /* PCI device we are attached to */
void __iomem *iobase;
+   void __iomem *mmio_base;
 
bool exclusive_vectors;
 
@@ -89,6 +90,21 @@ u32 vmci_get_vm_context_id(void)
return vm_context_id;
 }
 
+static unsigned int vmci_read_reg(struct vmci_guest_device *dev, u32 reg)
+{
+   if (dev->mmio_base != NULL)
+   return readl(dev->mmio_base + reg);
+   return ioread32(dev->iobase + reg);
+}
+
+static void vmci_write_reg(struct vmci_guest_device *dev, u32 val, u32 reg)
+{
+   if (dev->mmio_base != NULL)
+   writel(val, dev->mmio_base + reg);
+   else
+   iowrite32(val, dev->iobase + reg);
+}
+
 /*
  * VM to hypervisor call mechanism. We use the standard VMware naming
  * convention since shared code is calling this function as well.
@@ -116,7 +132,7 @@ int vmci_send_datagram(struct vmci_datagram *dg)
if (vmci_dev_g) {
iowrite8_rep(vmci_dev_g->iobase + VMCI_DATA_OUT_ADDR,
 dg, VMCI_DG_SIZE(dg));
-   result = ioread32(vmci_dev_g->iobase + VMCI_RESULT_LOW_ADDR);
+   result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
} else {
result = VMCI_ERROR_UNAVAILABLE;
}
@@ -384,7 +400,7 @@ static irqreturn_t vmci_interrupt(int irq, void *_dev)
unsigned int icr;
 
/* Acknowledge interrupt and determine what needs doing. */
-   icr = ioread32(dev->iobase + VMCI_ICR_ADDR);
+   icr = vmci_read_reg(dev, VMCI_ICR_ADDR);
if (icr == 0 || icr == ~0)
return IRQ_NONE;
 
@@ -429,7 +445,8 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
   const struct pci_device_id *id)
 {
struct vmci_guest_device *vmci_dev;
-   void __iomem *iobase;
+   void __iomem *iobase = NULL;
+   void __iomem *mmio_base = NULL;
unsigned int capabilities;
unsigned int caps_in_use;
unsigned long cmd;
@@ -445,16 +462,32 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
return error;
}
 
-   error = pcim_iomap_regions(pdev, 1 << 0, KBUILD_MODNAME);
-   if (error) {
-   dev_err(>dev, "Failed to reserve/map IO regions\n");
-   return error;
+   /*
+* The VMCI device with mmio access to registers requests 256KB
+* for BAR1. If present, driver will use new VMCI device
+* functionality for register access and datagram send/recv.
+*/
+
+   if (pci_resource_len(pdev, 1) == VMCI_WITH_MMIO_ACCESS_BAR_SIZE) {
+   dev_info(>dev, "MMIO register access is available\n");
+   mmio_base = pci_iomap_range(pdev, 1, VMCI_MMIO_ACCESS_OFFSET,
+   VMCI_MMIO_ACCESS_SIZE);
+   /* If the map fails, we fall back to IOIO access. */
+   if (!mmio_base)
+   dev_warn(>dev, "Failed to map MMIO register 
access\n");
}
 
-   iobase = pcim_iomap_table(pdev)[0];
+   if (!mmio_base) {
+   error = pcim_iomap_regions(pdev, BIT(0), KBUILD_MODNAME);
+   if (error) {
+   dev_err(>dev, "Failed to reserve/map IO 
regions\n");
+   return error;
+   }
+   iobase = pcim_iomap_table(pdev)[0];
+   }
 
-   dev_info(>dev, "Found VMCI PCI device at %#lx, irq %u\n",
-(unsigned long)iobase, pdev->irq);
+   dev_info(>dev, "Found VMCI PCI device at %#lx, %#lx, irq %u\n",
+(unsigned long)iobase, (unsigned long)mmio_base, pdev->irq);
 
vmci_dev = devm_kzalloc(>dev, sizeof(*vmci_dev), GFP_KERNEL)

[PATCH v2 0/8] VMCI: dma dg: Add support for DMA datagrams

2022-02-03 Thread Jorgen Hansen
A new version of the VMCI device will introduce two new major changes:
- support MMIO access to device registers
- support send/receive of datagrams using DMA transfers instead of
  ioread8_rep/iowrite8_rep operations
This patch series updates the VMCI driver to support these new
features while maintaining backwards compatibility.

The DMA based datagram operations use a send and a receive buffer
allocated at module load time. The buffer contains a header
describing the layout of the buffer followed by either an SG list or
inline data. The header also contains a flag indicating whether the
buffer is currently owned by the driver or the device. Both for send
and receive, the driver will initialize the buffer, transfer ownership
to the device by writing the buffer address to a register, and then
wait for the ownership to be transferred back. The device will
generate an interrupt when this happens.

v2 (fixes issues flagged by kernel test robot ):
- changed type of mmio_base to void __iomem *
- made vmci_read_reg, vmci_write_reg and vmci_write_data static functions

Jorgen Hansen (8):
  VMCI: dma dg: whitespace formatting change for vmci register defines
  VMCI: dma dg: add MMIO access to registers
  VMCI: dma dg: detect DMA datagram capability
  VMCI: dma dg: set OS page size
  VMCI: dma dg: register dummy IRQ handlers for DMA datagrams
  VMCI: dma dg: allocate send and receive buffers for DMA datagrams
  VMCI: dma dg: add support for DMA datagrams sends
  VMCI: dma dg: add support for DMA datagrams receive

 drivers/misc/vmw_vmci/vmci_guest.c | 340 -
 include/linux/vmw_vmci_defs.h  |  84 ++-
 2 files changed, 361 insertions(+), 63 deletions(-)

-- 
2.25.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 8/8] VMCI: dma dg: add support for DMA datagrams receive

2022-02-02 Thread Jorgen Hansen
Use the DMA based receive operation instead of the ioread8_rep
based datagram receive when DMA datagrams are supported.

In the receive operation, configure the header to point to the
page aligned VMCI_MAX_DG_SIZE part of the receive buffer
using s/g configuration for the header. This ensures that the
existing dispatch routine can be used with little modification.
Initiate the receive by writing the lower 32 bit of the buffer
to the VMCI_DATA_IN_LOW_ADDR register, and wait for the busy
flag to be changed by the device using a wait queue.

The existing dispatch routine for received  datagrams is reused
for the DMA datagrams with a few modifications:
- the receive buffer is always the maximum size for DMA datagrams
  (IO ports would try with a shorter buffer first to reduce
  overhead of the ioread8_rep operation).
- for DMA datagrams, datagrams are provided contiguous in the
  buffer as opposed to IO port datagrams, where they can start
  on any page boundary

Reviewed-by: Vishnu Dasa 
Signed-off-by: Jorgen Hansen 
---
 drivers/misc/vmw_vmci/vmci_guest.c | 103 ++---
 1 file changed, 79 insertions(+), 24 deletions(-)

diff --git a/drivers/misc/vmw_vmci/vmci_guest.c 
b/drivers/misc/vmw_vmci/vmci_guest.c
index ca73a1913404..2bcfa292772d 100644
--- a/drivers/misc/vmw_vmci/vmci_guest.c
+++ b/drivers/misc/vmw_vmci/vmci_guest.c
@@ -58,6 +58,7 @@ struct vmci_guest_device {
 
struct tasklet_struct datagram_tasklet;
struct tasklet_struct bm_tasklet;
+   struct wait_queue_head inout_wq;
 
void *data_buffer;
dma_addr_t data_buffer_base;
@@ -115,6 +116,36 @@ void vmci_write_reg(struct vmci_guest_device *dev, u32 
val, u32 reg)
iowrite32(val, dev->iobase + reg);
 }
 
+static void vmci_read_data(struct vmci_guest_device *vmci_dev,
+  void *dest, size_t size)
+{
+   if (vmci_dev->mmio_base == NULL)
+   ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR,
+   dest, size);
+   else {
+   /*
+* For DMA datagrams, the data_buffer will contain the header 
on the
+* first page, followed by the incoming datagram(s) on the 
following
+* pages. The header uses an S/G element immediately following 
the
+* header on the first page to point to the data area.
+*/
+   struct vmci_data_in_out_header *buffer_header = 
vmci_dev->data_buffer;
+   struct vmci_sg_elem *sg_array = (struct vmci_sg_elem 
*)(buffer_header + 1);
+   size_t buffer_offset = dest - vmci_dev->data_buffer;
+
+   buffer_header->opcode = 1;
+   buffer_header->size = 1;
+   buffer_header->busy = 0;
+   sg_array[0].addr = vmci_dev->data_buffer_base + buffer_offset;
+   sg_array[0].size = size;
+
+   vmci_write_reg(vmci_dev, 
lower_32_bits(vmci_dev->data_buffer_base),
+  VMCI_DATA_IN_LOW_ADDR);
+
+   wait_event(vmci_dev->inout_wq, buffer_header->busy == 1);
+   }
+}
+
 int vmci_write_data(struct vmci_guest_device *dev, struct vmci_datagram *dg)
 {
int result;
@@ -260,15 +291,17 @@ static int vmci_check_host_caps(struct pci_dev *pdev)
 }
 
 /*
- * Reads datagrams from the data in port and dispatches them. We
- * always start reading datagrams into only the first page of the
- * datagram buffer. If the datagrams don't fit into one page, we
- * use the maximum datagram buffer size for the remainder of the
- * invocation. This is a simple heuristic for not penalizing
- * small datagrams.
+ * Reads datagrams from the device and dispatches them. For IO port
+ * based access to the device, we always start reading datagrams into
+ * only the first page of the datagram buffer. If the datagrams don't
+ * fit into one page, we use the maximum datagram buffer size for the
+ * remainder of the invocation. This is a simple heuristic for not
+ * penalizing small datagrams. For DMA-based datagrams, we always
+ * use the maximum datagram buffer size, since there is no performance
+ * penalty for doing so.
  *
  * This function assumes that it has exclusive access to the data
- * in port for the duration of the call.
+ * in register(s) for the duration of the call.
  */
 static void vmci_dispatch_dgs(unsigned long data)
 {
@@ -276,23 +309,41 @@ static void vmci_dispatch_dgs(unsigned long data)
u8 *dg_in_buffer = vmci_dev->data_buffer;
struct vmci_datagram *dg;
size_t dg_in_buffer_size = VMCI_MAX_DG_SIZE;
-   size_t current_dg_in_buffer_size = PAGE_SIZE;
+   size_t current_dg_in_buffer_size;
size_t remaining_bytes;
+   bool is_io_port = vmci_dev->mmio_base == NULL;
 
BUILD_BUG_ON(VMCI_MAX_DG_SIZE < PAGE_SIZE);
 
-   ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR,
-   

[PATCH 6/8] VMCI: dma dg: allocate send and receive buffers for DMA datagrams

2022-02-02 Thread Jorgen Hansen
If DMA datagrams are used, allocate send and receive buffers
in coherent DMA memory.

This is done in preparation for the send and receive datagram
operations, where the buffers are used for the exchange of data
between driver and device.

Reviewed-by: Vishnu Dasa 
Signed-off-by: Jorgen Hansen 
---
 drivers/misc/vmw_vmci/vmci_guest.c | 66 ++
 include/linux/vmw_vmci_defs.h  |  4 ++
 2 files changed, 63 insertions(+), 7 deletions(-)

diff --git a/drivers/misc/vmw_vmci/vmci_guest.c 
b/drivers/misc/vmw_vmci/vmci_guest.c
index 1903fe8e7c68..d0acb686b464 100644
--- a/drivers/misc/vmw_vmci/vmci_guest.c
+++ b/drivers/misc/vmw_vmci/vmci_guest.c
@@ -31,6 +31,12 @@
 
 #define VMCI_UTIL_NUM_RESOURCES 1
 
+/*
+ * Datagram buffers for DMA send/receive must accommodate at least
+ * a maximum sized datagram and the header.
+ */
+#define VMCI_DMA_DG_BUFFER_SIZE (VMCI_MAX_DG_SIZE + PAGE_SIZE)
+
 static bool vmci_disable_msi;
 module_param_named(disable_msi, vmci_disable_msi, bool, 0);
 MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)");
@@ -53,6 +59,9 @@ struct vmci_guest_device {
struct tasklet_struct bm_tasklet;
 
void *data_buffer;
+   dma_addr_t data_buffer_base;
+   void *tx_buffer;
+   dma_addr_t tx_buffer_base;
void *notification_bitmap;
dma_addr_t notification_base;
 };
@@ -451,6 +460,24 @@ static irqreturn_t vmci_interrupt_dma_datagram(int irq, 
void *_dev)
return IRQ_HANDLED;
 }
 
+static void vmci_free_dg_buffers(struct vmci_guest_device *vmci_dev)
+{
+   if (vmci_dev->mmio_base != NULL) {
+   if (vmci_dev->tx_buffer != NULL)
+   dma_free_coherent(vmci_dev->dev,
+ VMCI_DMA_DG_BUFFER_SIZE,
+ vmci_dev->tx_buffer,
+ vmci_dev->tx_buffer_base);
+   if (vmci_dev->data_buffer != NULL)
+   dma_free_coherent(vmci_dev->dev,
+ VMCI_DMA_DG_BUFFER_SIZE,
+ vmci_dev->data_buffer,
+ vmci_dev->data_buffer_base);
+   } else {
+   vfree(vmci_dev->data_buffer);
+   }
+}
+
 /*
  * Most of the initialization at module load time is done here.
  */
@@ -520,11 +547,27 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
tasklet_init(_dev->bm_tasklet,
 vmci_process_bitmap, (unsigned long)vmci_dev);
 
-   vmci_dev->data_buffer = vmalloc(VMCI_MAX_DG_SIZE);
+   if (mmio_base != NULL) {
+   vmci_dev->tx_buffer = dma_alloc_coherent(>dev, 
VMCI_DMA_DG_BUFFER_SIZE,
+
_dev->tx_buffer_base,
+GFP_KERNEL);
+   if (!vmci_dev->tx_buffer) {
+   dev_err(>dev,
+   "Can't allocate memory for datagram tx 
buffer\n");
+   return -ENOMEM;
+   }
+
+   vmci_dev->data_buffer = dma_alloc_coherent(>dev, 
VMCI_DMA_DG_BUFFER_SIZE,
+  
_dev->data_buffer_base,
+  GFP_KERNEL);
+   } else {
+   vmci_dev->data_buffer = vmalloc(VMCI_MAX_DG_SIZE);
+   }
if (!vmci_dev->data_buffer) {
dev_err(>dev,
"Can't allocate memory for datagram buffer\n");
-   return -ENOMEM;
+   error = -ENOMEM;
+   goto err_free_data_buffers;
}
 
pci_set_master(pdev);   /* To enable queue_pair functionality. */
@@ -542,7 +585,7 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
if (!(capabilities & VMCI_CAPS_DATAGRAM)) {
dev_err(>dev, "Device does not support datagrams\n");
error = -ENXIO;
-   goto err_free_data_buffer;
+   goto err_free_data_buffers;
}
caps_in_use = VMCI_CAPS_DATAGRAM;
 
@@ -586,7 +629,7 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
dev_err(>dev,
"Missing capability: VMCI_CAPS_DMA_DATAGRAM\n");
error = -ENXIO;
-   goto err_free_data_buffer;
+   goto err_free_data_buffers;
}
}
 
@@ -602,6 +645,12 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
vmci_write_reg(vmci_dev, PAGE_SHIFT, VMCI_GUEST_PAGE_SHIFT);
page_shift = vmci_read_reg(vmci_dev, VMCI_GUEST_PAGE_SHIFT);
dev_info(>dev, "Using page shift %d\n", page_shift);
+
+  

[PATCH 7/8] VMCI: dma dg: add support for DMA datagrams sends

2022-02-02 Thread Jorgen Hansen
Use DMA based send operation from the transmit buffer instead of the
iowrite8_rep based datagram send when DMA datagrams are supported.

The outgoing datagram is sent as inline data in the VMCI transmit
buffer. Once the header has been configured, the send is initiated
by writing the lower 32 bit of the buffer base address to the
VMCI_DATA_OUT_LOW_ADDR register. Only then will the device process
the header and the datagram itself. Following that, the driver busy
waits (it isn't possible to sleep on the send path) for the header
busy flag to change - indicating that the send is complete.

Reviewed-by: Vishnu Dasa 
Signed-off-by: Jorgen Hansen 
---
 drivers/misc/vmw_vmci/vmci_guest.c | 44 --
 include/linux/vmw_vmci_defs.h  | 34 +++
 2 files changed, 76 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/vmw_vmci/vmci_guest.c 
b/drivers/misc/vmw_vmci/vmci_guest.c
index d0acb686b464..ca73a1913404 100644
--- a/drivers/misc/vmw_vmci/vmci_guest.c
+++ b/drivers/misc/vmw_vmci/vmci_guest.c
@@ -13,6 +13,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -114,6 +115,46 @@ void vmci_write_reg(struct vmci_guest_device *dev, u32 
val, u32 reg)
iowrite32(val, dev->iobase + reg);
 }
 
+int vmci_write_data(struct vmci_guest_device *dev, struct vmci_datagram *dg)
+{
+   int result;
+
+   if (dev->mmio_base != NULL) {
+   struct vmci_data_in_out_header *buffer_header = dev->tx_buffer;
+   u8 *dg_out_buffer = (u8 *)(buffer_header + 1);
+
+   if (VMCI_DG_SIZE(dg) > VMCI_MAX_DG_SIZE)
+   return VMCI_ERROR_INVALID_ARGS;
+
+   /*
+* Initialize send buffer with outgoing datagram
+* and set up header for inline data. Device will
+* not access buffer asynchronously - only after
+* the write to VMCI_DATA_OUT_LOW_ADDR.
+*/
+   memcpy(dg_out_buffer, dg, VMCI_DG_SIZE(dg));
+   buffer_header->opcode = 0;
+   buffer_header->size = VMCI_DG_SIZE(dg);
+   buffer_header->busy = 1;
+
+   vmci_write_reg(dev, lower_32_bits(dev->tx_buffer_base),
+  VMCI_DATA_OUT_LOW_ADDR);
+
+   /* Caller holds a spinlock, so cannot block. */
+   spin_until_cond(buffer_header->busy == 0);
+
+   result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
+   if (result == VMCI_SUCCESS)
+   result = (int)buffer_header->result;
+   } else {
+   iowrite8_rep(dev->iobase + VMCI_DATA_OUT_ADDR,
+dg, VMCI_DG_SIZE(dg));
+   result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
+   }
+
+   return result;
+}
+
 /*
  * VM to hypervisor call mechanism. We use the standard VMware naming
  * convention since shared code is calling this function as well.
@@ -139,8 +180,7 @@ int vmci_send_datagram(struct vmci_datagram *dg)
spin_lock_irqsave(_dev_spinlock, flags);
 
if (vmci_dev_g) {
-   iowrite8_rep(vmci_dev_g->iobase + VMCI_DATA_OUT_ADDR,
-dg, VMCI_DG_SIZE(dg));
+   vmci_write_data(vmci_dev_g, dg);
result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
} else {
result = VMCI_ERROR_UNAVAILABLE;
diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h
index 8bc37d8244a8..6fb663b36f72 100644
--- a/include/linux/vmw_vmci_defs.h
+++ b/include/linux/vmw_vmci_defs.h
@@ -110,6 +110,40 @@ enum {
 #define VMCI_MMIO_ACCESS_OFFSET((size_t)(128 * 1024))
 #define VMCI_MMIO_ACCESS_SIZE  ((size_t)(64 * 1024))
 
+/*
+ * For VMCI devices supporting the VMCI_CAPS_DMA_DATAGRAM capability, the
+ * sending and receiving of datagrams can be performed using DMA to/from
+ * a driver allocated buffer.
+ * Sending and receiving will be handled as follows:
+ * - when sending datagrams, the driver initializes the buffer where the
+ *   data part will refer to the outgoing VMCI datagram, sets the busy flag
+ *   to 1 and writes the address of the buffer to VMCI_DATA_OUT_HIGH_ADDR
+ *   and VMCI_DATA_OUT_LOW_ADDR. Writing to VMCI_DATA_OUT_LOW_ADDR triggers
+ *   the device processing of the buffer. When the device has processed the
+ *   buffer, it will write the result value to the buffer and then clear the
+ *   busy flag.
+ * - when receiving datagrams, the driver initializes the buffer where the
+ *   data part will describe the receive buffer, clears the busy flag and
+ *   writes the address of the buffer to VMCI_DATA_IN_HIGH_ADDR and
+ *   VMCI_DATA_IN_LOW_ADDR. Writing to VMCI_DATA_IN_LOW_ADDR triggers the
+ *   device processing of the buffer. The device will copy as many available
+ *   datagrams into the buffer as possible, and then sets the

[PATCH 5/8] VMCI: dma dg: register dummy IRQ handlers for DMA datagrams

2022-02-02 Thread Jorgen Hansen
Register dummy interrupt handlers for DMA datagrams in preparation for
DMA datagram receive operations.

Reviewed-by: Vishnu Dasa 
Signed-off-by: Jorgen Hansen 
---
 drivers/misc/vmw_vmci/vmci_guest.c | 42 +++---
 include/linux/vmw_vmci_defs.h  | 14 --
 2 files changed, 51 insertions(+), 5 deletions(-)

diff --git a/drivers/misc/vmw_vmci/vmci_guest.c 
b/drivers/misc/vmw_vmci/vmci_guest.c
index ec76661bde7e..1903fe8e7c68 100644
--- a/drivers/misc/vmw_vmci/vmci_guest.c
+++ b/drivers/misc/vmw_vmci/vmci_guest.c
@@ -414,6 +414,9 @@ static irqreturn_t vmci_interrupt(int irq, void *_dev)
icr &= ~VMCI_ICR_NOTIFICATION;
}
 
+   if (icr & VMCI_ICR_DMA_DATAGRAM)
+   icr &= ~VMCI_ICR_DMA_DATAGRAM;
+
if (icr != 0)
dev_warn(dev->dev,
 "Ignoring unknown interrupt cause (%d)\n",
@@ -438,6 +441,16 @@ static irqreturn_t vmci_interrupt_bm(int irq, void *_dev)
return IRQ_HANDLED;
 }
 
+/*
+ * Interrupt handler for MSI-X interrupt vector VMCI_INTR_DMA_DATAGRAM,
+ * which is for the completion of a DMA datagram send or receive operation.
+ * Will only get called if we are using MSI-X with exclusive vectors.
+ */
+static irqreturn_t vmci_interrupt_dma_datagram(int irq, void *_dev)
+{
+   return IRQ_HANDLED;
+}
+
 /*
  * Most of the initialization at module load time is done here.
  */
@@ -447,6 +460,7 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
struct vmci_guest_device *vmci_dev;
void __iomem *iobase = NULL;
char *mmio_base = NULL;
+   unsigned int num_irq_vectors;
unsigned int capabilities;
unsigned int caps_in_use;
unsigned long cmd;
@@ -635,8 +649,12 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
 * Enable interrupts.  Try MSI-X first, then MSI, and then fallback on
 * legacy interrupts.
 */
-   error = pci_alloc_irq_vectors(pdev, VMCI_MAX_INTRS, VMCI_MAX_INTRS,
-   PCI_IRQ_MSIX);
+   if (vmci_dev->mmio_base != NULL)
+   num_irq_vectors = VMCI_MAX_INTRS;
+   else
+   num_irq_vectors = VMCI_MAX_INTRS_NOTIFICATION;
+   error = pci_alloc_irq_vectors(pdev, num_irq_vectors, num_irq_vectors,
+ PCI_IRQ_MSIX);
if (error < 0) {
error = pci_alloc_irq_vectors(pdev, 1, 1,
PCI_IRQ_MSIX | PCI_IRQ_MSI | PCI_IRQ_LEGACY);
@@ -674,6 +692,17 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
pci_irq_vector(pdev, 1), error);
goto err_free_irq;
}
+   if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) {
+   error = request_irq(pci_irq_vector(pdev, 2),
+   vmci_interrupt_dma_datagram,
+   0, KBUILD_MODNAME, vmci_dev);
+   if (error) {
+   dev_err(>dev,
+   "Failed to allocate irq %u: %d\n",
+   pci_irq_vector(pdev, 2), error);
+   goto err_free_bm_irq;
+   }
+   }
}
 
dev_dbg(>dev, "Registered device\n");
@@ -684,6 +713,8 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
cmd = VMCI_IMR_DATAGRAM;
if (caps_in_use & VMCI_CAPS_NOTIFICATIONS)
cmd |= VMCI_IMR_NOTIFICATION;
+   if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM)
+   cmd |= VMCI_IMR_DMA_DATAGRAM;
vmci_write_reg(vmci_dev, cmd, VMCI_IMR_ADDR);
 
/* Enable interrupts. */
@@ -694,6 +725,8 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
vmci_call_vsock_callback(false);
return 0;
 
+err_free_bm_irq:
+   free_irq(pci_irq_vector(pdev, 1), vmci_dev);
 err_free_irq:
free_irq(pci_irq_vector(pdev, 0), vmci_dev);
tasklet_kill(_dev->datagram_tasklet);
@@ -759,8 +792,11 @@ static void vmci_guest_remove_device(struct pci_dev *pdev)
 * MSI-X, we might have multiple vectors, each with their own
 * IRQ, which we must free too.
 */
-   if (vmci_dev->exclusive_vectors)
+   if (vmci_dev->exclusive_vectors) {
free_irq(pci_irq_vector(pdev, 1), vmci_dev);
+   if (vmci_dev->mmio_base != NULL)
+   free_irq(pci_irq_vector(pdev, 2), vmci_dev);
+   }
free_irq(pci_irq_vector(pdev, 0), vmci_dev);
pci_free_irq_vectors(pdev);
 
diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h
index 4167779469fd..2b70c024dacb 100644
--- a/include/linux/vmw_vmci_defs.h
+++ b/include/linux/vmw_vmci_defs.h
@@ 

[PATCH 3/8] VMCI: dma dg: detect DMA datagram capability

2022-02-02 Thread Jorgen Hansen
Detect the VMCI DMA datagram capability, and if present, ack it
to the device.

Reviewed-by: Vishnu Dasa 
Signed-off-by: Jorgen Hansen 
---
 drivers/misc/vmw_vmci/vmci_guest.c | 11 +++
 include/linux/vmw_vmci_defs.h  |  1 +
 2 files changed, 12 insertions(+)

diff --git a/drivers/misc/vmw_vmci/vmci_guest.c 
b/drivers/misc/vmw_vmci/vmci_guest.c
index d00430e5aba3..4bd5891ff910 100644
--- a/drivers/misc/vmw_vmci/vmci_guest.c
+++ b/drivers/misc/vmw_vmci/vmci_guest.c
@@ -565,6 +565,17 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
}
}
 
+   if (mmio_base != NULL) {
+   if (capabilities & VMCI_CAPS_DMA_DATAGRAM) {
+   caps_in_use |= VMCI_CAPS_DMA_DATAGRAM;
+   } else {
+   dev_err(>dev,
+   "Missing capability: VMCI_CAPS_DMA_DATAGRAM\n");
+   error = -ENXIO;
+   goto err_free_data_buffer;
+   }
+   }
+
dev_info(>dev, "Using capabilities 0x%x\n", caps_in_use);
 
/* Let the host know which capabilities we intend to use. */
diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h
index 8fc00e2685cf..1ce2cffdc3ae 100644
--- a/include/linux/vmw_vmci_defs.h
+++ b/include/linux/vmw_vmci_defs.h
@@ -39,6 +39,7 @@
 #define VMCI_CAPS_DATAGRAM  BIT(2)
 #define VMCI_CAPS_NOTIFICATIONS BIT(3)
 #define VMCI_CAPS_PPN64 BIT(4)
+#define VMCI_CAPS_DMA_DATAGRAM  BIT(5)
 
 /* Interrupt Cause register bits. */
 #define VMCI_ICR_DATAGRAM  BIT(0)
-- 
2.25.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 4/8] VMCI: dma dg: set OS page size

2022-02-02 Thread Jorgen Hansen
Tell the device the page size used by the OS.

Reviewed-by: Vishnu Dasa 
Signed-off-by: Jorgen Hansen 
---
 drivers/misc/vmw_vmci/vmci_guest.c | 9 +
 include/linux/vmw_vmci_defs.h  | 1 +
 2 files changed, 10 insertions(+)

diff --git a/drivers/misc/vmw_vmci/vmci_guest.c 
b/drivers/misc/vmw_vmci/vmci_guest.c
index 4bd5891ff910..ec76661bde7e 100644
--- a/drivers/misc/vmw_vmci/vmci_guest.c
+++ b/drivers/misc/vmw_vmci/vmci_guest.c
@@ -581,6 +581,15 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
/* Let the host know which capabilities we intend to use. */
vmci_write_reg(vmci_dev, caps_in_use, VMCI_CAPS_ADDR);
 
+   if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) {
+   uint32_t page_shift;
+
+   /* Let the device know the size for pages passed down. */
+   vmci_write_reg(vmci_dev, PAGE_SHIFT, VMCI_GUEST_PAGE_SHIFT);
+   page_shift = vmci_read_reg(vmci_dev, VMCI_GUEST_PAGE_SHIFT);
+   dev_info(>dev, "Using page shift %d\n", page_shift);
+   }
+
/* Set up global device so that we can start sending datagrams */
spin_lock_irq(_dev_spinlock);
vmci_dev_g = vmci_dev;
diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h
index 1ce2cffdc3ae..4167779469fd 100644
--- a/include/linux/vmw_vmci_defs.h
+++ b/include/linux/vmw_vmci_defs.h
@@ -21,6 +21,7 @@
 #define VMCI_CAPS_ADDR  0x18
 #define VMCI_RESULT_LOW_ADDR0x1c
 #define VMCI_RESULT_HIGH_ADDR   0x20
+#define VMCI_GUEST_PAGE_SHIFT   0x34
 
 /* Max number of devices. */
 #define VMCI_MAX_DEVICES 1
-- 
2.25.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 1/8] VMCI: dma dg: whitespace formatting change for vmci register defines

2022-02-02 Thread Jorgen Hansen
Update formatting of existing register defines in preparation for
adding additional register definitions for the VMCI device.

Reviewed-by: Vishnu Dasa 
Signed-off-by: Jorgen Hansen 
---
 include/linux/vmw_vmci_defs.h | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h
index e36cb114c188..9911ecfc18ba 100644
--- a/include/linux/vmw_vmci_defs.h
+++ b/include/linux/vmw_vmci_defs.h
@@ -12,15 +12,15 @@
 #include 
 
 /* Register offsets. */
-#define VMCI_STATUS_ADDR  0x00
-#define VMCI_CONTROL_ADDR 0x04
-#define VMCI_ICR_ADDR0x08
-#define VMCI_IMR_ADDR 0x0c
-#define VMCI_DATA_OUT_ADDR0x10
-#define VMCI_DATA_IN_ADDR 0x14
-#define VMCI_CAPS_ADDR0x18
-#define VMCI_RESULT_LOW_ADDR  0x1c
-#define VMCI_RESULT_HIGH_ADDR 0x20
+#define VMCI_STATUS_ADDR0x00
+#define VMCI_CONTROL_ADDR   0x04
+#define VMCI_ICR_ADDR   0x08
+#define VMCI_IMR_ADDR   0x0c
+#define VMCI_DATA_OUT_ADDR  0x10
+#define VMCI_DATA_IN_ADDR   0x14
+#define VMCI_CAPS_ADDR  0x18
+#define VMCI_RESULT_LOW_ADDR0x1c
+#define VMCI_RESULT_HIGH_ADDR   0x20
 
 /* Max number of devices. */
 #define VMCI_MAX_DEVICES 1
-- 
2.25.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 2/8] VMCI: dma dg: add MMIO access to registers

2022-02-02 Thread Jorgen Hansen
Detect the support for MMIO access through examination of the length
of the region requested in BAR1. If it is 256KB, the VMCI device
supports MMIO access to registers.

If MMIO access is supported, map the area of the region used for
MMIO access (64KB size at offset 128KB).

Add wrapper functions for accessing 32 bit register accesses through
either MMIO or IO ports based on device configuration.

Sending and receiving datagrams through iowrite8_rep/ioread8_rep is
left unchanged for now, and will be addressed in a later change.

Reviewed-by: Vishnu Dasa 
Signed-off-by: Jorgen Hansen 
---
 drivers/misc/vmw_vmci/vmci_guest.c | 68 ++
 include/linux/vmw_vmci_defs.h  | 12 ++
 2 files changed, 62 insertions(+), 18 deletions(-)

diff --git a/drivers/misc/vmw_vmci/vmci_guest.c 
b/drivers/misc/vmw_vmci/vmci_guest.c
index 1018dc77269d..d00430e5aba3 100644
--- a/drivers/misc/vmw_vmci/vmci_guest.c
+++ b/drivers/misc/vmw_vmci/vmci_guest.c
@@ -45,6 +45,7 @@ static u32 vm_context_id = VMCI_INVALID_ID;
 struct vmci_guest_device {
struct device *dev; /* PCI device we are attached to */
void __iomem *iobase;
+   char *mmio_base;
 
bool exclusive_vectors;
 
@@ -89,6 +90,21 @@ u32 vmci_get_vm_context_id(void)
return vm_context_id;
 }
 
+unsigned int vmci_read_reg(struct vmci_guest_device *dev, u32 reg)
+{
+   if (dev->mmio_base != NULL)
+   return readl(dev->mmio_base + reg);
+   return ioread32(dev->iobase + reg);
+}
+
+void vmci_write_reg(struct vmci_guest_device *dev, u32 val, u32 reg)
+{
+   if (dev->mmio_base != NULL)
+   writel(val, dev->mmio_base + reg);
+   else
+   iowrite32(val, dev->iobase + reg);
+}
+
 /*
  * VM to hypervisor call mechanism. We use the standard VMware naming
  * convention since shared code is calling this function as well.
@@ -116,7 +132,7 @@ int vmci_send_datagram(struct vmci_datagram *dg)
if (vmci_dev_g) {
iowrite8_rep(vmci_dev_g->iobase + VMCI_DATA_OUT_ADDR,
 dg, VMCI_DG_SIZE(dg));
-   result = ioread32(vmci_dev_g->iobase + VMCI_RESULT_LOW_ADDR);
+   result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
} else {
result = VMCI_ERROR_UNAVAILABLE;
}
@@ -384,7 +400,7 @@ static irqreturn_t vmci_interrupt(int irq, void *_dev)
unsigned int icr;
 
/* Acknowledge interrupt and determine what needs doing. */
-   icr = ioread32(dev->iobase + VMCI_ICR_ADDR);
+   icr = vmci_read_reg(dev, VMCI_ICR_ADDR);
if (icr == 0 || icr == ~0)
return IRQ_NONE;
 
@@ -429,7 +445,8 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
   const struct pci_device_id *id)
 {
struct vmci_guest_device *vmci_dev;
-   void __iomem *iobase;
+   void __iomem *iobase = NULL;
+   char *mmio_base = NULL;
unsigned int capabilities;
unsigned int caps_in_use;
unsigned long cmd;
@@ -445,16 +462,32 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
return error;
}
 
-   error = pcim_iomap_regions(pdev, 1 << 0, KBUILD_MODNAME);
-   if (error) {
-   dev_err(>dev, "Failed to reserve/map IO regions\n");
-   return error;
+   /*
+* The VMCI device with mmio access to registers requests 256KB
+* for BAR1. If present, driver will use new VMCI device
+* functionality for register access and datagram send/recv.
+*/
+
+   if (pci_resource_len(pdev, 1) == VMCI_WITH_MMIO_ACCESS_BAR_SIZE) {
+   dev_info(>dev, "MMIO register access is available\n");
+   mmio_base = pci_iomap_range(pdev, 1, VMCI_MMIO_ACCESS_OFFSET,
+   VMCI_MMIO_ACCESS_SIZE);
+   /* If the map fails, we fall back to IOIO access. */
+   if (!mmio_base)
+   dev_warn(>dev, "Failed to map MMIO register 
access\n");
}
 
-   iobase = pcim_iomap_table(pdev)[0];
+   if (!mmio_base) {
+   error = pcim_iomap_regions(pdev, BIT(0), KBUILD_MODNAME);
+   if (error) {
+   dev_err(>dev, "Failed to reserve/map IO 
regions\n");
+   return error;
+   }
+   iobase = pcim_iomap_table(pdev)[0];
+   }
 
-   dev_info(>dev, "Found VMCI PCI device at %#lx, irq %u\n",
-(unsigned long)iobase, pdev->irq);
+   dev_info(>dev, "Found VMCI PCI device at %#lx, %#lx, irq %u\n",
+(unsigned long)iobase, (unsigned long)mmio_base, pdev->irq);
 
vmci_dev = devm_kzalloc(>dev, sizeof(*vmci_dev), GFP_KERNEL);
if (!vmci_dev) {
@@ 

[PATCH 0/8] VMCI: dma dg: Add support for DMA datagrams

2022-02-02 Thread Jorgen Hansen
A new version of the VMCI device will introduce two new major changes:
- support MMIO access to device registers
- support send/receive of datagrams using DMA transfers instead of
  ioread8_rep/iowrite8_rep operations
This patch series updates the VMCI driver to support these new
features while maintaining backwards compatibility.

The DMA based datagram operations use a send and a receive buffer
allocated at module load time. The buffer contains a header
describing the layout of the buffer followed by either an SG list or
inline data. The header also contains a flag indicating whether the
buffer is currently owned by the driver or the device. Both for send
and receive, the driver will initialize the buffer, transfer ownership
to the device by writing the buffer address to a register, and then
wait for the ownership to be transferred back. The device will
generate an interrupt when this happens.

Jorgen Hansen (8):
  VMCI: dma dg: whitespace formatting change for vmci register defines
  VMCI: dma dg: add MMIO access to registers
  VMCI: dma dg: detect DMA datagram capability
  VMCI: dma dg: set OS page size
  VMCI: dma dg: register dummy IRQ handlers for DMA datagrams
  VMCI: dma dg: allocate send and receive buffers for DMA datagrams
  VMCI: dma dg: add support for DMA datagrams sends
  VMCI: dma dg: add support for DMA datagrams receive

 drivers/misc/vmw_vmci/vmci_guest.c | 339 -
 include/linux/vmw_vmci_defs.h  |  84 ++-
 2 files changed, 360 insertions(+), 63 deletions(-)

-- 
2.25.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH] MAINTAINERS: add VM SOCKETS (AF_VSOCK) entry

2021-09-06 Thread Jorgen Hansen


> On 6 Sep 2021, at 11:11, Stefano Garzarella  wrote:
> 
> Add a new entry for VM Sockets (AF_VSOCK) that covers vsock core,
> tests, and headers. Move some general vsock stuff from virtio-vsock
> entry into this new more general vsock entry.
> 
> I've been reviewing and contributing for the last few years,
> so I'm available to help maintain this code.
> 
> Cc: Dexuan Cui 
> Cc: Jorgen Hansen 
> Cc: Stefan Hajnoczi 
> Suggested-by: Michael S. Tsirkin 
> Signed-off-by: Stefano Garzarella 
> ---
> 
> Dexuan, Jorgen, Stefan, would you like to co-maintain or
> be added as a reviewer?

Hi Stefano,

Please add me as a maintainer as well. I’ll try to help more out.

Thanks,
Jorgen

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v2] MAINTAINERS: Update for VMCI driver

2021-07-21 Thread Jorgen Hansen
Add maintainer info for the VMware VMCI driver.

v2: moved pv-drivers to L: as private list

Acked-by: Vishnu Dasa 
Signed-off-by: Jorgen Hansen 
---
 MAINTAINERS | 8 
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index a61f4f3..969a67a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -19792,6 +19792,14 @@ L: net...@vger.kernel.org
 S: Supported
 F: drivers/ptp/ptp_vmw.c
 
+VMWARE VMCI DRIVER
+M: Jorgen Hansen 
+M: Vishnu Dasa 
+L: linux-ker...@vger.kernel.org
+L: pv-driv...@vmware.com (private)
+S: Maintained
+F: drivers/misc/vmw_vmci/
+
 VMWARE VMMOUSE SUBDRIVER
 M: "VMware Graphics" 
 M: "VMware, Inc." 
-- 
2.6.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH] MAINTAINERS: Update for VMCI driver

2021-07-21 Thread Jorgen Hansen


> On 21 Jul 2021, at 11:00, Greg KH  wrote:
> 
> On Wed, Jul 21, 2021 at 08:46:15AM +0000, Jorgen Hansen wrote:
>> 
>> 
>>> On 20 Jul 2021, at 12:39, Greg KH  wrote:
>>> 
>>> On Tue, Jul 20, 2021 at 03:29:01AM -0700, Jorgen Hansen wrote:
>>>> Add maintainer info for the VMware VMCI driver.
>>>> 
>>>> Signed-off-by: Jorgen Hansen 
>>>> ---
>>>> MAINTAINERS | 8 
>>>> 1 file changed, 8 insertions(+)
>>>> 
>>>> diff --git a/MAINTAINERS b/MAINTAINERS
>>>> index a61f4f3..7e7c6fa 100644
>>>> --- a/MAINTAINERS
>>>> +++ b/MAINTAINERS
>>>> @@ -19792,6 +19792,14 @@ L:net...@vger.kernel.org
>>>> S: Supported
>>>> F: drivers/ptp/ptp_vmw.c
>>>> 
>>>> +VMWARE VMCI DRIVER
>>>> +M:Jorgen Hansen 
>>>> +M:Vishnu Dasa 
>>>> +M:"VMware, Inc." 
>>> 
>>> Please do not use generic aliases as they provide no personal
>>> responsibility.  Just stick with real people.
>> 
>> That makes sense. However, the pv-drivers list is used for keeping managers
>> and people helping with testing in the loop. So would adding pv-drivers as a
>> second L: entry be OK?
> 
> Is it really a list?  If not, then that would not make much sense.

It is - with VMware subscribers only but anyone can post to it. If the intent 
of the
L: entries is to allow folks to subscribe to relevant information, then it isn’t
appropriate.

All existing vmw driver maintainer entries have pv-drivers as an M: entry,
so has there been a change in policy regarding this? The approach has
been quite useful for us.
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH] MAINTAINERS: Update for VMCI driver

2021-07-21 Thread Jorgen Hansen



> On 20 Jul 2021, at 12:39, Greg KH  wrote:
> 
> On Tue, Jul 20, 2021 at 03:29:01AM -0700, Jorgen Hansen wrote:
>> Add maintainer info for the VMware VMCI driver.
>> 
>> Signed-off-by: Jorgen Hansen 
>> ---
>> MAINTAINERS | 8 
>> 1 file changed, 8 insertions(+)
>> 
>> diff --git a/MAINTAINERS b/MAINTAINERS
>> index a61f4f3..7e7c6fa 100644
>> --- a/MAINTAINERS
>> +++ b/MAINTAINERS
>> @@ -19792,6 +19792,14 @@ L:  net...@vger.kernel.org
>> S:   Supported
>> F:   drivers/ptp/ptp_vmw.c
>> 
>> +VMWARE VMCI DRIVER
>> +M:  Jorgen Hansen 
>> +M:  Vishnu Dasa 
>> +M:  "VMware, Inc." 
> 
> Please do not use generic aliases as they provide no personal
> responsibility.  Just stick with real people.

That makes sense. However, the pv-drivers list is used for keeping managers
and people helping with testing in the loop. So would adding pv-drivers as a
second L: entry be OK?

Thanks,
Jorgen
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH] MAINTAINERS: Update for VMCI driver

2021-07-20 Thread Jorgen Hansen
Add maintainer info for the VMware VMCI driver.

Signed-off-by: Jorgen Hansen 
---
 MAINTAINERS | 8 
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index a61f4f3..7e7c6fa 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -19792,6 +19792,14 @@ L: net...@vger.kernel.org
 S: Supported
 F: drivers/ptp/ptp_vmw.c
 
+VMWARE VMCI DRIVER
+M: Jorgen Hansen 
+M: Vishnu Dasa 
+M: "VMware, Inc." 
+L: linux-ker...@vger.kernel.org
+S: Maintained
+F: drivers/misc/vmw_vmci/
+
 VMWARE VMMOUSE SUBDRIVER
 M: "VMware Graphics" 
 M: "VMware, Inc." 
-- 
2.6.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [RFC] vsock: add multiple transports support for dgram

2021-04-13 Thread Jorgen Hansen


On 12 Apr 2021, at 20:53, Jiang Wang . 
mailto:jiang.w...@bytedance.com>> wrote:

On Mon, Apr 12, 2021 at 7:04 AM Stefano Garzarella 
mailto:sgarz...@redhat.com>> wrote:

Hi Jiang,
thanks for re-starting the multi-transport support for dgram!

No problem.

On Wed, Apr 07, 2021 at 11:25:36AM -0700, Jiang Wang . wrote:
On Wed, Apr 7, 2021 at 2:51 AM Jorgen Hansen 
mailto:jhan...@vmware.com>> wrote:


On 6 Apr 2021, at 20:31, Jiang Wang 
mailto:jiang.w...@bytedance.com>> wrote:

From: "jiang.wang<http://jiang.wang>" 
mailto:jiang.w...@bytedance.com>>

Currently, only VMCI supports dgram sockets. To supported
nested VM use case, this patch removes transport_dgram and
uses transport_g2h and transport_h2g for dgram too.

I agree on this part, I think that's the direction to go.
transport_dgram was added as a shortcut.

Got it.


Could you provide some background for introducing this change - are you
looking at introducing datagrams for a different transport? VMCI datagrams
already support the nested use case,

Yes, I am trying to introduce datagram for virtio transport. I wrote a
spec patch for
virtio dgram support and also a code patch, but the code patch is still WIP.
When I wrote this commit message, I was thinking nested VM is the same as
multiple transport support. But now, I realize they are different.
Nested VMs may use
the same virtualization layer(KVM on KVM), or different virtualization layers
(KVM on ESXi). Thanks for letting me know that VMCI already supported nested
use cases. I think you mean VMCI on VMCI, right?

but if we need to support multiple datagram
transports we need to rework how we administer port assignment for datagrams.
One specific issue is that the vmci transport won’t receive any datagrams for a
port unless the datagram socket has already been assigned the vmci transport
and the port bound to the underlying VMCI device (see below for more details).

I see.

The transport is assgined when sending every packet and
receiving every packet on dgram sockets.

Is the intent that the same datagram socket can be used for sending packets both
In the host to guest, and the guest to directions?

Nope. One datagram socket will only send packets to one direction, either to the
host or to the guest. My above description is wrong. When sending packets, the
transport is assigned with the first packet (with auto_bind).

I'm not sure this is right.
The auto_bind on the first packet should only assign a local port to the
socket, but does not affect the transport to be used.

A user could send one packet to the nested guest and another to the host
using the same socket, or am I wrong?

OK. I think you are right.


The problem is when receiving packets. The listener can bind to the
VMADDR_CID_ANY
address. Then it is unclear which transport we should use. For stream
sockets, there will be a new socket for each connection, and transport
can be decided
at that time. For datagram sockets, I am not sure how to handle that.

yes, this I think is the main problem, but maybe the sender one is even
more complicated.

Maybe we should remove the 1:1 association we have now between vsk and
transport.

Yes, I thought about that too. One idea is to define two transports in vsk.
For sending pkt, we can pick the right transport when we get the packet, like
in virtio_transport_send_pkt_info(). For receiving pkts, we have to check
and call both
transports dequeue callbacks if the local cid is CID_ANY.

At least for DGRAM, for connected sockets I think the association makes
sense.

Yeah. For a connected socket, we will only use one transport.

For VMCI, does the same transport can be used for both receiving from
host and from
the guest?

Yes, they're registered at different times, but it's the same transport.


For virtio, the h2g and g2h transports are different,, so we have to
choose
one of them. My original thought is to wait until the first packet
arrives.

Another idea is that we always bind to host addr and use h2g
transport because I think that might
be more common. If a listener wants to recv packets from the host, then
it
should bind to the guest addr instead of CID_ANY.

Yes, I remember we discussed this idea, this would simplify the
receiving, but there is still the issue of a user wanting to receive
packets from both the nested guest and the host.

OK. Agree.

Any other suggestions?


I think one solution could be to remove the 1:1 association between
DGRAM socket and transport.

IIUC VMCI creates a skb for each received packet and queues it through
sk_receive_skb() directly in the struct sock.

Then the .dgram_dequeue() callback dequeues them using
skb_recv_datagram().

We can move these parts in the vsock core, and create some helpers to
allow the transports to enqueue received DGRAM packets in the same way
(and with the same format) directly in the struct sock.


I agree to use skbs (and move them to vscok core). We have another use case
which will nee

Re: [External] [RFC] vsock: add multiple transports support for dgram

2021-04-13 Thread Jorgen Hansen


> On 7 Apr 2021, at 20:25, Jiang Wang .  wrote:
> 
> On Wed, Apr 7, 2021 at 2:51 AM Jorgen Hansen  wrote:
>> 
>> 
>>> On 6 Apr 2021, at 20:31, Jiang Wang  wrote:
>>> 
>>> From: "jiang.wang" 
>>> 
>>> Currently, only VMCI supports dgram sockets. To supported
>>> nested VM use case, this patch removes transport_dgram and
>>> uses transport_g2h and transport_h2g for dgram too.
>> 
>> Could you provide some background for introducing this change - are you
>> looking at introducing datagrams for a different transport? VMCI datagrams
>> already support the nested use case,
> 
> Yes, I am trying to introduce datagram for virtio transport. I wrote a
> spec patch for
> virtio dgram support and also a code patch, but the code patch is still WIP.

Oh ok. Cool. I must have missed the spec patch - could you provide a reference 
to
it?

> When I wrote this commit message, I was thinking nested VM is the same as
> multiple transport support. But now, I realize they are different.
> Nested VMs may use
> the same virtualization layer(KVM on KVM), or different virtualization layers
> (KVM on ESXi). Thanks for letting me know that VMCI already supported nested
> use cases. I think you mean VMCI on VMCI, right?

Right, only VMCI on VMCI. 

I’ll respond to Stefano’s email for the rest of the discussion.

Thanks,
Jorgen
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [RFC] vsock: add multiple transports support for dgram

2021-04-07 Thread Jorgen Hansen

> On 6 Apr 2021, at 20:31, Jiang Wang  wrote:
> 
> From: "jiang.wang" 
> 
> Currently, only VMCI supports dgram sockets. To supported
> nested VM use case, this patch removes transport_dgram and
> uses transport_g2h and transport_h2g for dgram too.

Could you provide some background for introducing this change - are you
looking at introducing datagrams for a different transport? VMCI datagrams
already support the nested use case, but if we need to support multiple datagram
transports we need to rework how we administer port assignment for datagrams.
One specific issue is that the vmci transport won’t receive any datagrams for a
port unless the datagram socket has already been assigned the vmci transport
and the port bound to the underlying VMCI device (see below for more details).


> The transport is assgined when sending every packet and
> receiving every packet on dgram sockets.

Is the intent that the same datagram socket can be used for sending packets both
In the host to guest, and the guest to directions? 

Also, as mentioned above the vSocket datagram needs to be bound to a port in the
VMCI transport before we can receive any datagrams on that port. This means that
vmci_transport_recv_dgram_cb won’t be called unless it is already associated 
with
a socket as the transport, and therefore we can’t delay the transport 
assignment to
that point.


> Signed-off-by: Jiang Wang 
> ---
> This patch is not tested. I don't have a VMWare testing
> environment. Could someone help me to test it? 
> 
> include/net/af_vsock.h |  2 --
> net/vmw_vsock/af_vsock.c   | 63 +-
> net/vmw_vsock/vmci_transport.c | 20 +-
> 3 files changed, 45 insertions(+), 40 deletions(-)
> 
> diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
> index b1c717286993..aba241e0d202 100644
> --- a/include/net/af_vsock.h
> +++ b/include/net/af_vsock.h
> @@ -96,8 +96,6 @@ struct vsock_transport_send_notify_data {
> #define VSOCK_TRANSPORT_F_H2G 0x0001
> /* Transport provides guest->host communication */
> #define VSOCK_TRANSPORT_F_G2H 0x0002
> -/* Transport provides DGRAM communication */
> -#define VSOCK_TRANSPORT_F_DGRAM  0x0004
> /* Transport provides local (loopback) communication */
> #define VSOCK_TRANSPORT_F_LOCAL   0x0008
> 
> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
> index 92a72f0e0d94..7739ab2521a1 100644
> --- a/net/vmw_vsock/af_vsock.c
> +++ b/net/vmw_vsock/af_vsock.c
> @@ -449,8 +449,6 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct 
> vsock_sock *psk)
> 
>   switch (sk->sk_type) {
>   case SOCK_DGRAM:
> - new_transport = transport_dgram;
> - break;
>   case SOCK_STREAM:
>   if (vsock_use_local_transport(remote_cid))
>   new_transport = transport_local;
> @@ -1096,7 +1094,6 @@ static int vsock_dgram_sendmsg(struct socket *sock, 
> struct msghdr *msg,
>   struct sock *sk;
>   struct vsock_sock *vsk;
>   struct sockaddr_vm *remote_addr;
> - const struct vsock_transport *transport;
> 
>   if (msg->msg_flags & MSG_OOB)
>   return -EOPNOTSUPP;
> @@ -1108,25 +1105,30 @@ static int vsock_dgram_sendmsg(struct socket *sock, 
> struct msghdr *msg,
> 
>   lock_sock(sk);
> 
> - transport = vsk->transport;
> -
>   err = vsock_auto_bind(vsk);
>   if (err)
>   goto out;
> 
> -
>   /* If the provided message contains an address, use that.  Otherwise
>* fall back on the socket's remote handle (if it has been connected).
>*/
>   if (msg->msg_name &&
>   vsock_addr_cast(msg->msg_name, msg->msg_namelen,
>   _addr) == 0) {
> + vsock_addr_init(>remote_addr, remote_addr->svm_cid,
> + remote_addr->svm_port);
> +
> + err = vsock_assign_transport(vsk, NULL);
> + if (err) {
> + err = -EINVAL;
> + goto out;
> + }
> +
>   /* Ensure this address is of the right type and is a valid
>* destination.
>*/
> -
>   if (remote_addr->svm_cid == VMADDR_CID_ANY)
> - remote_addr->svm_cid = transport->get_local_cid();
> + remote_addr->svm_cid = vsk->transport->get_local_cid();
> 
>   if (!vsock_addr_bound(remote_addr)) {
>   err = -EINVAL;
> @@ -1136,7 +1138,7 @@ static int vsock_dgram_sendmsg(struct socket *sock, 
> struct msghdr *msg,
>   remote_addr = >remote_addr;
> 
>   if (remote_addr->svm_cid == VMADDR_CID_ANY)
> - remote_addr->svm_cid = transport->get_local_cid();
> + remote_addr->svm_cid = vsk->transport->get_local_cid();
> 
>   /* XXX Should connect() or this function ensure remote_addr is
>* 

Re: [RFC PATCH v5 02/19] af_vsock: separate wait data loop

2021-02-25 Thread Jorgen Hansen


> On 18 Feb 2021, at 06:36, Arseny Krasnov  wrote:
> 
> This moves wait loop for data to dedicated function, because later
> it will be used by SEQPACKET data receive loop.
> 
> Signed-off-by: Arseny Krasnov 
> ---
> net/vmw_vsock/af_vsock.c | 155 +--
> 1 file changed, 83 insertions(+), 72 deletions(-)
> 
> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
> index 656370e11707..6cf7bb977aa1 100644
> --- a/net/vmw_vsock/af_vsock.c
> +++ b/net/vmw_vsock/af_vsock.c
> @@ -1832,6 +1832,68 @@ static int vsock_connectible_sendmsg(struct socket 
> *sock, struct msghdr *msg,
>   return err;
> }
> 
> +static int vsock_wait_data(struct sock *sk, struct wait_queue_entry *wait,
> +long timeout,
> +struct vsock_transport_recv_notify_data *recv_data,
> +size_t target)
> +{
> + const struct vsock_transport *transport;
> + struct vsock_sock *vsk;
> + s64 data;
> + int err;
> +
> + vsk = vsock_sk(sk);
> + err = 0;
> + transport = vsk->transport;
> + prepare_to_wait(sk_sleep(sk), wait, TASK_INTERRUPTIBLE);
> +
> + while ((data = vsock_stream_has_data(vsk)) == 0) {

In the original code, the prepare_to_wait() is called for each iteration of the 
while loop. In this
version, it is only called once. So if we do multiple iterations, the thread 
would be in the
TASK_RUNNING state, and subsequent schedule_timeout() will return immediately. 
So
looks like the prepare_to_wait() should be move here, in case we have a 
spurious wake_up.

> + if (sk->sk_err != 0 ||
> + (sk->sk_shutdown & RCV_SHUTDOWN) ||
> + (vsk->peer_shutdown & SEND_SHUTDOWN)) {
> + break;
> + }
> +
> + /* Don't wait for non-blocking sockets. */
> + if (timeout == 0) {
> + err = -EAGAIN;
> + break;
> + }
> +
> + if (recv_data) {
> + err = transport->notify_recv_pre_block(vsk, target, 
> recv_data);
> + if (err < 0)
> + break;
> + }
> +
> + release_sock(sk);
> + timeout = schedule_timeout(timeout);
> + lock_sock(sk);
> +
> + if (signal_pending(current)) {
> + err = sock_intr_errno(timeout);
> + break;
> + } else if (timeout == 0) {
> + err = -EAGAIN;
> + break;
> + }
> + }
> +
> + finish_wait(sk_sleep(sk), wait);
> +
> + if (err)
> + return err;
> +
> + /* Internal transport error when checking for available
> +  * data. XXX This should be changed to a connection
> +  * reset in a later change.
> +  */
> + if (data < 0)
> + return -ENOMEM;
> +
> + return data;
> +}
> +
> static int
> vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
> int flags)
> @@ -1911,85 +1973,34 @@ vsock_connectible_recvmsg(struct socket *sock, struct 
> msghdr *msg, size_t len,
> 
> 
>   while (1) {
> - s64 ready;
> -
> - prepare_to_wait(sk_sleep(sk), , TASK_INTERRUPTIBLE);
> - ready = vsock_stream_has_data(vsk);
> -
> - if (ready == 0) {
> - if (sk->sk_err != 0 ||
> - (sk->sk_shutdown & RCV_SHUTDOWN) ||
> - (vsk->peer_shutdown & SEND_SHUTDOWN)) {
> - finish_wait(sk_sleep(sk), );
> - break;
> - }
> - /* Don't wait for non-blocking sockets. */
> - if (timeout == 0) {
> - err = -EAGAIN;
> - finish_wait(sk_sleep(sk), );
> - break;
> - }
> + ssize_t read;
> 
> - err = transport->notify_recv_pre_block(
> - vsk, target, _data);
> - if (err < 0) {
> - finish_wait(sk_sleep(sk), );
> - break;
> - }
> - release_sock(sk);
> - timeout = schedule_timeout(timeout);
> - lock_sock(sk);
> + err = vsock_wait_data(sk, , timeout, _data, target);
> + if (err <= 0)
> + break;
> 
> - if (signal_pending(current)) {
> - err = sock_intr_errno(timeout);
> - finish_wait(sk_sleep(sk), );
> - break;
> - } else if (timeout == 0) {
> - err = -EAGAIN;
> - finish_wait(sk_sleep(sk), );
> -  

Re: [RFC PATCH v5 04/19] af_vsock: implement SEQPACKET receive loop

2021-02-25 Thread Jorgen Hansen
On 18 Feb 2021, at 06:37, Arseny Krasnov  wrote:
> 
> This adds receive loop for SEQPACKET. It looks like receive loop for
> STREAM, but there is a little bit difference:
> 1) It doesn't call notify callbacks.
> 2) It doesn't care about 'SO_SNDLOWAT' and 'SO_RCVLOWAT' values, because
>   there is no sense for these values in SEQPACKET case.
> 3) It waits until whole record is received or error is found during
>   receiving.
> 4) It processes and sets 'MSG_TRUNC' flag.
> 
> So to avoid extra conditions for two types of socket inside one loop, two
> independent functions were created.
> 
> Signed-off-by: Arseny Krasnov 
> ---
> include/net/af_vsock.h   |  5 +++
> net/vmw_vsock/af_vsock.c | 97 +++-
> 2 files changed, 101 insertions(+), 1 deletion(-)
> 
> diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
> index b1c717286993..01563338cc03 100644
> --- a/include/net/af_vsock.h
> +++ b/include/net/af_vsock.h
> @@ -135,6 +135,11 @@ struct vsock_transport {
>   bool (*stream_is_active)(struct vsock_sock *);
>   bool (*stream_allow)(u32 cid, u32 port);
> 
> + /* SEQ_PACKET. */
> + size_t (*seqpacket_seq_get_len)(struct vsock_sock *vsk);
> + int (*seqpacket_dequeue)(struct vsock_sock *vsk, struct msghdr *msg,
> +  int flags, bool *msg_ready);
> +
>   /* Notification. */
>   int (*notify_poll_in)(struct vsock_sock *, size_t, bool *);
>   int (*notify_poll_out)(struct vsock_sock *, size_t, bool *);
> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
> index d277dc1cdbdf..b754927a556a 100644
> --- a/net/vmw_vsock/af_vsock.c
> +++ b/net/vmw_vsock/af_vsock.c
> @@ -1972,6 +1972,98 @@ static int __vsock_stream_recvmsg(struct sock *sk, 
> struct msghdr *msg,
>   return err;
> }
> 
> +static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg,
> +  size_t len, int flags)
> +{
> + const struct vsock_transport *transport;
> + const struct iovec *orig_iov;
> + unsigned long orig_nr_segs;
> + bool msg_ready;
> + struct vsock_sock *vsk;
> + size_t record_len;
> + long timeout;
> + int err = 0;
> + DEFINE_WAIT(wait);
> +
> + vsk = vsock_sk(sk);
> + transport = vsk->transport;
> +
> + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
> + orig_nr_segs = msg->msg_iter.nr_segs;
> + orig_iov = msg->msg_iter.iov;
> + msg_ready = false;
> + record_len = 0;
> +
> + while (1) {
> + err = vsock_wait_data(sk, , timeout, NULL, 0);
> +
> + if (err <= 0) {
> + /* In case of any loop break(timeout, signal
> +  * interrupt or shutdown), we report user that
> +  * nothing was copied.
> +  */
> + err = 0;
> + break;
> + }
> +
> + if (record_len == 0) {
> + record_len =
> + transport->seqpacket_seq_get_len(vsk);
> +
> + if (record_len == 0)
> + continue;
> + }
> +
> + err = transport->seqpacket_dequeue(vsk, msg,
> + flags, _ready);
> +
> + if (err < 0) {
> + if (err == -EAGAIN) {
> + iov_iter_init(>msg_iter, READ,
> +   orig_iov, orig_nr_segs,
> +   len);
> + /* Clear 'MSG_EOR' here, because dequeue
> +  * callback above set it again if it was
> +  * set by sender. This 'MSG_EOR' is from
> +  * dropped record.
> +  */
> + msg->msg_flags &= ~MSG_EOR;
> + record_len = 0;
> + continue;
> + }

So a question for my understanding of the flow here. SOCK_SEQPACKET is 
reliable, so
what does it mean to drop the record? Is the transport supposed to roll back to 
the
beginning of the current record? If the incoming data in the transport doesn’t 
follow
the protocol, and packets need to be dropped, shouldn’t the socket be reset or 
similar?
Maybe there is potential for simplifying the flow if that is the case.

> +
> + err = -ENOMEM;
> + break;
> + }
> +
> + if (msg_ready)
> + break;
> + }
> +
> + if (sk->sk_err)
> + err = -sk->sk_err;
> + else if (sk->sk_shutdown & RCV_SHUTDOWN)
> + err = 0;
> +
> + if (msg_ready) {
> + /* User sets MSG_TRUNC, so return real length of
> +  * packet.
> +  */
> + if (flags & MSG_TRUNC)
> + err = 

Re: [RFC PATCH v4 02/17] af_vsock: separate wait data loop

2021-02-11 Thread Jorgen Hansen


> On 7 Feb 2021, at 16:14, Arseny Krasnov  wrote:
> 
> This moves wait loop for data to dedicated function, because later
> it will be used by SEQPACKET data receive loop.
> 
> Signed-off-by: Arseny Krasnov 
> ---
> net/vmw_vsock/af_vsock.c | 158 +--
> 1 file changed, 86 insertions(+), 72 deletions(-)
> 
> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
> index f4fabec50650..38927695786f 100644
> --- a/net/vmw_vsock/af_vsock.c
> +++ b/net/vmw_vsock/af_vsock.c
> @@ -1833,6 +1833,71 @@ static int vsock_connectible_sendmsg(struct socket 
> *sock, struct msghdr *msg,
>   return err;
> }
> 
> +static int vsock_wait_data(struct sock *sk, struct wait_queue_entry *wait,
> +long timeout,
> +struct vsock_transport_recv_notify_data *recv_data,
> +size_t target)
> +{
> + const struct vsock_transport *transport;
> + struct vsock_sock *vsk;
> + s64 data;
> + int err;
> +
> + vsk = vsock_sk(sk);
> + err = 0;
> + transport = vsk->transport;
> + prepare_to_wait(sk_sleep(sk), wait, TASK_INTERRUPTIBLE);
> +
> + while ((data = vsock_stream_has_data(vsk)) == 0) {
> + if (sk->sk_err != 0 ||
> + (sk->sk_shutdown & RCV_SHUTDOWN) ||
> + (vsk->peer_shutdown & SEND_SHUTDOWN)) {
> + goto out;
> + }
> +
> + /* Don't wait for non-blocking sockets. */
> + if (timeout == 0) {
> + err = -EAGAIN;
> + goto out;
> + }
> +
> + if (recv_data) {
> + err = transport->notify_recv_pre_block(vsk, target, 
> recv_data);
> + if (err < 0)
> + goto out;
> + }
> +
> + release_sock(sk);
> + timeout = schedule_timeout(timeout);
> + lock_sock(sk);
> +
> + if (signal_pending(current)) {
> + err = sock_intr_errno(timeout);
> + goto out;
> + } else if (timeout == 0) {
> + err = -EAGAIN;
> + goto out;
> + }
> + }
> +
> + finish_wait(sk_sleep(sk), wait);
> +
> + /* Invalid queue pair content. XXX This should
> +  * be changed to a connection reset in a later
> +  * change.
> +  */

Since you are here, could you update this comment to something like:

/* Internal transport error when checking for available
 * data. XXX This should be changed to a connection
 * reset in a later change.
 */

> + if (data < 0)
> + return -ENOMEM;
> +
> + /* Have some data, return. */
> + if (data)
> + return data;
> +
> +out:
> + finish_wait(sk_sleep(sk), wait);
> + return err;
> +}

I agree with Stefanos suggestion to get rid of the out: part  and just have the 
single finish_wait().

> +
> static int
> vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
> int flags)
> @@ -1912,85 +1977,34 @@ vsock_connectible_recvmsg(struct socket *sock, struct 
> msghdr *msg, size_t len,
> 
> 
>   while (1) {
> - s64 ready;
> + ssize_t read;
> 
> - prepare_to_wait(sk_sleep(sk), , TASK_INTERRUPTIBLE);
> - ready = vsock_stream_has_data(vsk);
> -
> - if (ready == 0) {
> - if (sk->sk_err != 0 ||
> - (sk->sk_shutdown & RCV_SHUTDOWN) ||
> - (vsk->peer_shutdown & SEND_SHUTDOWN)) {
> - finish_wait(sk_sleep(sk), );
> - break;
> - }
> - /* Don't wait for non-blocking sockets. */
> - if (timeout == 0) {
> - err = -EAGAIN;
> - finish_wait(sk_sleep(sk), );
> - break;
> - }
> -
> - err = transport->notify_recv_pre_block(
> - vsk, target, _data);
> - if (err < 0) {
> - finish_wait(sk_sleep(sk), );
> - break;
> - }
> - release_sock(sk);
> - timeout = schedule_timeout(timeout);
> - lock_sock(sk);
> -
> - if (signal_pending(current)) {
> - err = sock_intr_errno(timeout);
> - finish_wait(sk_sleep(sk), );
> - break;
> - } else if (timeout == 0) {
> - err = -EAGAIN;
> - finish_wait(sk_sleep(sk), );
> - break;
> - }
> - } else {
> - 

[PATCH v2 3/3] VMCI: Enforce queuepair max size for IOCTL_VMCI_QUEUEPAIR_ALLOC

2021-01-20 Thread Jorgen Hansen
When create the VMCI queue pair tracking data structures on the host
side, the IOCTL for creating the VMCI queue pair didn't validate
the queue pair size parameters. This change adds checks for this.

This avoids a memory allocation issue in qp_host_alloc_queue, as
reported by nslusa...@gmx.net. The check in qp_host_alloc_queue
has also been updated to enforce the maximum queue pair size
as defined by VMCI_MAX_GUEST_QP_MEMORY.

The fix has been verified using sample code supplied by
nslusa...@gmx.net.

Reported-by: nslusa...@gmx.net
Reviewed-by: Vishnu Dasa 
Signed-off-by: Jorgen Hansen 
---
 drivers/misc/vmw_vmci/vmci_queue_pair.c | 12 
 include/linux/vmw_vmci_defs.h   |  4 ++--
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c 
b/drivers/misc/vmw_vmci/vmci_queue_pair.c
index 525ef96..d787dde 100644
--- a/drivers/misc/vmw_vmci/vmci_queue_pair.c
+++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c
@@ -237,7 +237,9 @@ static struct qp_list qp_guest_endpoints = {
 #define QPE_NUM_PAGES(_QPE) ((u32) \
 (DIV_ROUND_UP(_QPE.produce_size, PAGE_SIZE) + \
  DIV_ROUND_UP(_QPE.consume_size, PAGE_SIZE) + 2))
-
+#define QP_SIZES_ARE_VALID(_prod_qsize, _cons_qsize) \
+   ((_prod_qsize) + (_cons_qsize) >= max(_prod_qsize, _cons_qsize) && \
+(_prod_qsize) + (_cons_qsize) <= VMCI_MAX_GUEST_QP_MEMORY)
 
 /*
  * Frees kernel VA space for a given queue and its queue header, and
@@ -528,7 +530,7 @@ static struct vmci_queue *qp_host_alloc_queue(u64 size)
u64 num_pages;
const size_t queue_size = sizeof(*queue) + sizeof(*(queue->kernel_if));
 
-   if (size > SIZE_MAX - PAGE_SIZE)
+   if (size > min_t(size_t, VMCI_MAX_GUEST_QP_MEMORY, SIZE_MAX - 
PAGE_SIZE))
return NULL;
num_pages = DIV_ROUND_UP(size, PAGE_SIZE) + 1;
if (num_pages > (SIZE_MAX - queue_size) /
@@ -1929,6 +1931,9 @@ int vmci_qp_broker_alloc(struct vmci_handle handle,
 struct vmci_qp_page_store *page_store,
 struct vmci_ctx *context)
 {
+   if (!QP_SIZES_ARE_VALID(produce_size, consume_size))
+   return VMCI_ERROR_NO_RESOURCES;
+
return qp_broker_alloc(handle, peer, flags, priv_flags,
   produce_size, consume_size,
   page_store, context, NULL, NULL, NULL, NULL);
@@ -2685,8 +2690,7 @@ int vmci_qpair_alloc(struct vmci_qp **qpair,
 * used by the device is NO_RESOURCES, so use that here too.
 */
 
-   if (produce_qsize + consume_qsize < max(produce_qsize, consume_qsize) ||
-   produce_qsize + consume_qsize > VMCI_MAX_GUEST_QP_MEMORY)
+   if (!QP_SIZES_ARE_VALID(produce_qsize, consume_qsize))
return VMCI_ERROR_NO_RESOURCES;
 
retval = vmci_route(, , false, );
diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h
index be0afe6..e36cb11 100644
--- a/include/linux/vmw_vmci_defs.h
+++ b/include/linux/vmw_vmci_defs.h
@@ -66,7 +66,7 @@ enum {
  * consists of at least two pages, the memory limit also dictates the
  * number of queue pairs a guest can create.
  */
-#define VMCI_MAX_GUEST_QP_MEMORY (128 * 1024 * 1024)
+#define VMCI_MAX_GUEST_QP_MEMORY ((size_t)(128 * 1024 * 1024))
 #define VMCI_MAX_GUEST_QP_COUNT  (VMCI_MAX_GUEST_QP_MEMORY / PAGE_SIZE / 2)
 
 /*
@@ -80,7 +80,7 @@ enum {
  * too much kernel memory (especially on vmkernel).  We limit a queuepair to
  * 32 KB, or 16 KB per queue for symmetrical pairs.
  */
-#define VMCI_MAX_PINNED_QP_MEMORY (32 * 1024)
+#define VMCI_MAX_PINNED_QP_MEMORY ((size_t)(32 * 1024))
 
 /*
  * We have a fixed set of resource IDs available in the VMX.
-- 
2.6.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v2 2/3] VMCI: Use set_page_dirty_lock() when unregistering guest memory

2021-01-20 Thread Jorgen Hansen
When the VMCI host support releases guest memory in the case where
the VM was killed, the pinned guest pages aren't locked. Use
set_page_dirty_lock() instead of set_page_dirty().

Testing done: Killed VM while having an active VMCI based vSocket
connection and observed warning from ext4. With this fix, no
warning was observed. Ran various vSocket tests without issues.

Fixes: 06164d2b72aa ("VMCI: queue pairs implementation.")
Reviewed-by: Vishnu Dasa 
Signed-off-by: Jorgen Hansen 
---
 drivers/misc/vmw_vmci/vmci_queue_pair.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c 
b/drivers/misc/vmw_vmci/vmci_queue_pair.c
index a3691c1..525ef96 100644
--- a/drivers/misc/vmw_vmci/vmci_queue_pair.c
+++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c
@@ -630,7 +630,7 @@ static void qp_release_pages(struct page **pages,
 
for (i = 0; i < num_pages; i++) {
if (dirty)
-   set_page_dirty(pages[i]);
+   set_page_dirty_lock(pages[i]);
 
put_page(pages[i]);
pages[i] = NULL;
-- 
2.6.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v2 1/3] VMCI: Stop log spew when qp allocation isn't possible

2021-01-20 Thread Jorgen Hansen
VMCI queue pair allocation is disabled, if a VM is in FT mode. In
these cases, VMware Tools may still once in a while attempt to
create a vSocket stream connection, resulting in multiple
warnings in the kernel logs. Therefore downgrade the error log to
a debug log.

Reviewed-by: Vishnu Dasa 
Signed-off-by: Jorgen Hansen 
---
 drivers/misc/vmw_vmci/vmci_queue_pair.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c 
b/drivers/misc/vmw_vmci/vmci_queue_pair.c
index c490658..a3691c1 100644
--- a/drivers/misc/vmw_vmci/vmci_queue_pair.c
+++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c
@@ -1207,7 +1207,7 @@ static int qp_alloc_guest_work(struct vmci_handle *handle,
} else {
result = qp_alloc_hypercall(queue_pair_entry);
if (result < VMCI_SUCCESS) {
-   pr_warn("qp_alloc_hypercall result = %d\n", result);
+   pr_devel("qp_alloc_hypercall result = %d\n", result);
goto error;
}
}
-- 
2.6.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v2 0/3] VMCI: Queue pair bug fixes

2021-01-20 Thread Jorgen Hansen
This series contains three bug fixes for the queue pair
implementation in the VMCI driver.

v1 -> v2:
  - format patches as a series
  - use min_t instead of min to ensure size_t comparison
(issue pointed out by kernel test robot )

Jorgen Hansen (3):
  VMCI: Stop log spew when qp allocation isn't possible
  VMCI: Use set_page_dirty_lock() when unregistering guest memory
  VMCI: Enforce queuepair max size for IOCTL_VMCI_QUEUEPAIR_ALLOC

 drivers/misc/vmw_vmci/vmci_queue_pair.c | 16 ++--
 include/linux/vmw_vmci_defs.h   |  4 ++--
 2 files changed, 12 insertions(+), 8 deletions(-)

-- 
2.6.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH] VMCI: Enforce queuepair max size for IOCTL_VMCI_QUEUEPAIR_ALLOC

2021-01-11 Thread Jorgen Hansen
On 11 Jan 2021, at 13:46, Greg KH  wrote:
> 
> On Mon, Jan 11, 2021 at 04:18:53AM -0800, Jorgen Hansen wrote:
>> When create the VMCI queue pair tracking data structures on the host
>> side, the IOCTL for creating the VMCI queue pair didn't validate
>> the queue pair size parameters. This change adds checks for this.
>> 
>> This avoids a memory allocation issue in qp_host_alloc_queue, as
>> reported by nslusa...@gmx.net. The check in qp_host_alloc_queue
>> has also been updated to enforce the maximum queue pair size
>> as defined by VMCI_MAX_GUEST_QP_MEMORY.
>> 
>> The fix has been verified using sample code supplied by
>> nslusa...@gmx.net.
>> 
>> Reported-by: nslusa...@gmx.net
>> Reviewed-by: Vishnu Dasa 
>> Signed-off-by: Jorgen Hansen 
>> ---
>> drivers/misc/vmw_vmci/vmci_queue_pair.c | 12 
>> include/linux/vmw_vmci_defs.h   |  4 ++--
>> 2 files changed, 10 insertions(+), 6 deletions(-)
> 
> Hi,
> 
> This is the friendly patch-bot of Greg Kroah-Hartman.  You have sent him
> a patch that has triggered this response.  He used to manually respond
> to these common problems, but in order to save his sanity (he kept
> writing the same thing over and over, yet to different people), I was
> created.  Hopefully you will not take offence and will fix the problem
> in your patch and resubmit it so that it can be accepted into the Linux
> kernel tree.
> 
> You are receiving this message because of the following common error(s)
> as indicated below:
> 
> - You sent multiple patches, yet no indication of which ones should be
>  applied in which order.  Greg could just guess, but if you are
>  receiving this email, he guessed wrong and the patches didn't apply.
>  Please read the section entitled "The canonical patch format" in the
>  kernel file, Documentation/SubmittingPatches for a description of how
>  to do this so that Greg has a chance to apply these correctly.
> 
> 
> If you wish to discuss this problem further, or you have questions about
> how to resolve this issue, please feel free to respond to this email and
> Greg will reply once he has dug out from the pending patches received
> from other developers.
> 
> thanks,
> 
> greg k-h's patch email bot

Hi,

The patches are independent and should be able to apply in any order;
I didn’t see any problems when applying them in different orders locally.

I’m hesitant to provide the patches as a series of patches, since one of
them:
 VMCI: Use set_page_dirty_lock() when unregistering guest memory
is marked as fixing an original checkin, and should be considered for
backporting, whereas the others are either not important to backport
or rely on other recent changes. However, if formatting them as a
series of misc fixes is preferred, I’ll do that.

Thanks,
Jorgen
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH] VMCI: Use set_page_dirty_lock() when unregistering guest memory

2021-01-11 Thread Jorgen Hansen
When the VMCI host support releases guest memory in the case where
the VM was killed, the pinned guest pages aren't locked. Use
set_page_dirty_lock() instead of set_page_dirty().

Testing done: Killed VM while having an active VMCI based vSocket
connection and observed warning from ext4. With this fix, no
warning was observed. Ran various vSocket tests without issues.

Fixes: 06164d2b72aa ("VMCI: queue pairs implementation.")
Reviewed-by: Vishnu Dasa 
Signed-off-by: Jorgen Hansen 
---
 drivers/misc/vmw_vmci/vmci_queue_pair.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c 
b/drivers/misc/vmw_vmci/vmci_queue_pair.c
index a3691c1..525ef96 100644
--- a/drivers/misc/vmw_vmci/vmci_queue_pair.c
+++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c
@@ -630,7 +630,7 @@ static void qp_release_pages(struct page **pages,
 
for (i = 0; i < num_pages; i++) {
if (dirty)
-   set_page_dirty(pages[i]);
+   set_page_dirty_lock(pages[i]);
 
put_page(pages[i]);
pages[i] = NULL;
-- 
2.6.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH] VMCI: Stop log spew when qp allocation isn't possible

2021-01-11 Thread Jorgen Hansen
VMCI queue pair allocation is disabled, if a VM is in FT mode. In
these cases, VMware Tools may still once in a while attempt to
create a vSocket stream connection, resulting in multiple
warnings in the kernel logs. Therefore downgrade the error log to
a debug log.

Reviewed-by: Vishnu Dasa 
Signed-off-by: Jorgen Hansen 
---
 drivers/misc/vmw_vmci/vmci_queue_pair.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c 
b/drivers/misc/vmw_vmci/vmci_queue_pair.c
index c490658..a3691c1 100644
--- a/drivers/misc/vmw_vmci/vmci_queue_pair.c
+++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c
@@ -1207,7 +1207,7 @@ static int qp_alloc_guest_work(struct vmci_handle *handle,
} else {
result = qp_alloc_hypercall(queue_pair_entry);
if (result < VMCI_SUCCESS) {
-   pr_warn("qp_alloc_hypercall result = %d\n", result);
+   pr_devel("qp_alloc_hypercall result = %d\n", result);
goto error;
}
}
-- 
2.6.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH] VMCI: Enforce queuepair max size for IOCTL_VMCI_QUEUEPAIR_ALLOC

2021-01-11 Thread Jorgen Hansen
When create the VMCI queue pair tracking data structures on the host
side, the IOCTL for creating the VMCI queue pair didn't validate
the queue pair size parameters. This change adds checks for this.

This avoids a memory allocation issue in qp_host_alloc_queue, as
reported by nslusa...@gmx.net. The check in qp_host_alloc_queue
has also been updated to enforce the maximum queue pair size
as defined by VMCI_MAX_GUEST_QP_MEMORY.

The fix has been verified using sample code supplied by
nslusa...@gmx.net.

Reported-by: nslusa...@gmx.net
Reviewed-by: Vishnu Dasa 
Signed-off-by: Jorgen Hansen 
---
 drivers/misc/vmw_vmci/vmci_queue_pair.c | 12 
 include/linux/vmw_vmci_defs.h   |  4 ++--
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c 
b/drivers/misc/vmw_vmci/vmci_queue_pair.c
index 525ef96..39d2f191 100644
--- a/drivers/misc/vmw_vmci/vmci_queue_pair.c
+++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c
@@ -237,7 +237,9 @@ static struct qp_list qp_guest_endpoints = {
 #define QPE_NUM_PAGES(_QPE) ((u32) \
 (DIV_ROUND_UP(_QPE.produce_size, PAGE_SIZE) + \
  DIV_ROUND_UP(_QPE.consume_size, PAGE_SIZE) + 2))
-
+#define QP_SIZES_ARE_VALID(_prod_qsize, _cons_qsize) \
+   ((_prod_qsize) + (_cons_qsize) >= max(_prod_qsize, _cons_qsize) && \
+(_prod_qsize) + (_cons_qsize) <= VMCI_MAX_GUEST_QP_MEMORY)
 
 /*
  * Frees kernel VA space for a given queue and its queue header, and
@@ -528,7 +530,7 @@ static struct vmci_queue *qp_host_alloc_queue(u64 size)
u64 num_pages;
const size_t queue_size = sizeof(*queue) + sizeof(*(queue->kernel_if));
 
-   if (size > SIZE_MAX - PAGE_SIZE)
+   if (size > min(VMCI_MAX_GUEST_QP_MEMORY, SIZE_MAX - PAGE_SIZE))
return NULL;
num_pages = DIV_ROUND_UP(size, PAGE_SIZE) + 1;
if (num_pages > (SIZE_MAX - queue_size) /
@@ -1929,6 +1931,9 @@ int vmci_qp_broker_alloc(struct vmci_handle handle,
 struct vmci_qp_page_store *page_store,
 struct vmci_ctx *context)
 {
+   if (!QP_SIZES_ARE_VALID(produce_size, consume_size))
+   return VMCI_ERROR_NO_RESOURCES;
+
return qp_broker_alloc(handle, peer, flags, priv_flags,
   produce_size, consume_size,
   page_store, context, NULL, NULL, NULL, NULL);
@@ -2685,8 +2690,7 @@ int vmci_qpair_alloc(struct vmci_qp **qpair,
 * used by the device is NO_RESOURCES, so use that here too.
 */
 
-   if (produce_qsize + consume_qsize < max(produce_qsize, consume_qsize) ||
-   produce_qsize + consume_qsize > VMCI_MAX_GUEST_QP_MEMORY)
+   if (!QP_SIZES_ARE_VALID(produce_qsize, consume_qsize))
return VMCI_ERROR_NO_RESOURCES;
 
retval = vmci_route(, , false, );
diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h
index be0afe6..e36cb11 100644
--- a/include/linux/vmw_vmci_defs.h
+++ b/include/linux/vmw_vmci_defs.h
@@ -66,7 +66,7 @@ enum {
  * consists of at least two pages, the memory limit also dictates the
  * number of queue pairs a guest can create.
  */
-#define VMCI_MAX_GUEST_QP_MEMORY (128 * 1024 * 1024)
+#define VMCI_MAX_GUEST_QP_MEMORY ((size_t)(128 * 1024 * 1024))
 #define VMCI_MAX_GUEST_QP_COUNT  (VMCI_MAX_GUEST_QP_MEMORY / PAGE_SIZE / 2)
 
 /*
@@ -80,7 +80,7 @@ enum {
  * too much kernel memory (especially on vmkernel).  We limit a queuepair to
  * 32 KB, or 16 KB per queue for symmetrical pairs.
  */
-#define VMCI_MAX_PINNED_QP_MEMORY (32 * 1024)
+#define VMCI_MAX_PINNED_QP_MEMORY ((size_t)(32 * 1024))
 
 /*
  * We have a fixed set of resource IDs available in the VMX.
-- 
2.6.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH net-next 3/6] vsock: add local transport support in the vsock core

2019-11-21 Thread Jorgen Hansen via Virtualization
> From: Stefano Garzarella [mailto:sgarz...@redhat.com]
> Sent: Thursday, November 21, 2019 5:13 PM
> 
> On Thu, Nov 21, 2019 at 03:53:47PM +0000, Jorgen Hansen wrote:
> > > From: Stefano Garzarella [mailto:sgarz...@redhat.com]
> > > Sent: Thursday, November 21, 2019 4:22 PM
> > >
> > > On Thu, Nov 21, 2019 at 03:04:18PM +, Jorgen Hansen wrote:
> > > > > From: Stefano Garzarella [mailto:sgarz...@redhat.com]
> > > > > Sent: Tuesday, November 19, 2019 12:01 PM
> > > > > To: net...@vger.kernel.org
> > > > >
> > > > > This patch allows to register a transport able to handle
> > > > > local communication (loopback).
> > > > >
> > > > > Signed-off-by: Stefano Garzarella 
> > > > > ---
> > > > >  include/net/af_vsock.h   |  2 ++
> > > > >  net/vmw_vsock/af_vsock.c | 17 -
> > > > >  2 files changed, 18 insertions(+), 1 deletion(-)
> > > > >
> > > > > diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
> > > > > index 4206dc6d813f..b1c717286993 100644
> > > > > --- a/include/net/af_vsock.h
> > > > > +++ b/include/net/af_vsock.h
> > > > > @@ -98,6 +98,8 @@ struct vsock_transport_send_notify_data {
> > > > >  #define VSOCK_TRANSPORT_F_G2H0x0002
> > > > >  /* Transport provides DGRAM communication */
> > > > >  #define VSOCK_TRANSPORT_F_DGRAM  0x0004
> > > > > +/* Transport provides local (loopback) communication */
> > > > > +#define VSOCK_TRANSPORT_F_LOCAL  0x0008
> > > > >
> > > > >  struct vsock_transport {
> > > > >   struct module *module;
> > > > > diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
> > > > > index cc8659838bf2..c9e5bad59dc1 100644
> > > > > --- a/net/vmw_vsock/af_vsock.c
> > > > > +++ b/net/vmw_vsock/af_vsock.c
> > > > > @@ -136,6 +136,8 @@ static const struct vsock_transport
> > > *transport_h2g;
> > > > >  static const struct vsock_transport *transport_g2h;
> > > > >  /* Transport used for DGRAM communication */
> > > > >  static const struct vsock_transport *transport_dgram;
> > > > > +/* Transport used for local communication */
> > > > > +static const struct vsock_transport *transport_local;
> > > > >  static DEFINE_MUTEX(vsock_register_mutex);
> > > > >
> > > > >  / UTILS /
> > > > > @@ -2130,7 +2132,7 @@
> > > EXPORT_SYMBOL_GPL(vsock_core_get_transport);
> > > > >
> > > > >  int vsock_core_register(const struct vsock_transport *t, int 
> > > > > features)
> > > > >  {
> > > > > - const struct vsock_transport *t_h2g, *t_g2h, *t_dgram;
> > > > > + const struct vsock_transport *t_h2g, *t_g2h, *t_dgram,
> *t_local;
> > > > >   int err = mutex_lock_interruptible(_register_mutex);
> > > > >
> > > > >   if (err)
> > > > > @@ -2139,6 +2141,7 @@ int vsock_core_register(const struct
> > > > > vsock_transport *t, int features)
> > > > >   t_h2g = transport_h2g;
> > > > >   t_g2h = transport_g2h;
> > > > >   t_dgram = transport_dgram;
> > > > > + t_local = transport_local;
> > > > >
> > > > >   if (features & VSOCK_TRANSPORT_F_H2G) {
> > > > >   if (t_h2g) {
> > > > > @@ -2164,9 +2167,18 @@ int vsock_core_register(const struct
> > > > > vsock_transport *t, int features)
> > > > >   t_dgram = t;
> > > > >   }
> > > > >
> > > > > + if (features & VSOCK_TRANSPORT_F_LOCAL) {
> > > > > + if (t_local) {
> > > > > + err = -EBUSY;
> > > > > + goto err_busy;
> > > > > + }
> > > > > + t_local = t;
> > > > > + }
> > > > > +
> > > > >   transport_h2g = t_h2g;
> > > > >   transport_g2h = t_g2h;
> > > > >   transport_dgram = t_dgram;
> > > > > + transport_local = t_local;
> > > > >
> > > > >  err_busy:
> > > > >  

RE: [PATCH net-next 3/6] vsock: add local transport support in the vsock core

2019-11-21 Thread Jorgen Hansen via Virtualization
> From: Stefano Garzarella [mailto:sgarz...@redhat.com]
> Sent: Thursday, November 21, 2019 4:22 PM
> 
> On Thu, Nov 21, 2019 at 03:04:18PM +0000, Jorgen Hansen wrote:
> > > From: Stefano Garzarella [mailto:sgarz...@redhat.com]
> > > Sent: Tuesday, November 19, 2019 12:01 PM
> > > To: net...@vger.kernel.org
> > >
> > > This patch allows to register a transport able to handle
> > > local communication (loopback).
> > >
> > > Signed-off-by: Stefano Garzarella 
> > > ---
> > >  include/net/af_vsock.h   |  2 ++
> > >  net/vmw_vsock/af_vsock.c | 17 -
> > >  2 files changed, 18 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
> > > index 4206dc6d813f..b1c717286993 100644
> > > --- a/include/net/af_vsock.h
> > > +++ b/include/net/af_vsock.h
> > > @@ -98,6 +98,8 @@ struct vsock_transport_send_notify_data {
> > >  #define VSOCK_TRANSPORT_F_G2H0x0002
> > >  /* Transport provides DGRAM communication */
> > >  #define VSOCK_TRANSPORT_F_DGRAM  0x0004
> > > +/* Transport provides local (loopback) communication */
> > > +#define VSOCK_TRANSPORT_F_LOCAL  0x0008
> > >
> > >  struct vsock_transport {
> > >   struct module *module;
> > > diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
> > > index cc8659838bf2..c9e5bad59dc1 100644
> > > --- a/net/vmw_vsock/af_vsock.c
> > > +++ b/net/vmw_vsock/af_vsock.c
> > > @@ -136,6 +136,8 @@ static const struct vsock_transport
> *transport_h2g;
> > >  static const struct vsock_transport *transport_g2h;
> > >  /* Transport used for DGRAM communication */
> > >  static const struct vsock_transport *transport_dgram;
> > > +/* Transport used for local communication */
> > > +static const struct vsock_transport *transport_local;
> > >  static DEFINE_MUTEX(vsock_register_mutex);
> > >
> > >  / UTILS /
> > > @@ -2130,7 +2132,7 @@
> EXPORT_SYMBOL_GPL(vsock_core_get_transport);
> > >
> > >  int vsock_core_register(const struct vsock_transport *t, int features)
> > >  {
> > > - const struct vsock_transport *t_h2g, *t_g2h, *t_dgram;
> > > + const struct vsock_transport *t_h2g, *t_g2h, *t_dgram, *t_local;
> > >   int err = mutex_lock_interruptible(_register_mutex);
> > >
> > >   if (err)
> > > @@ -2139,6 +2141,7 @@ int vsock_core_register(const struct
> > > vsock_transport *t, int features)
> > >   t_h2g = transport_h2g;
> > >   t_g2h = transport_g2h;
> > >   t_dgram = transport_dgram;
> > > + t_local = transport_local;
> > >
> > >   if (features & VSOCK_TRANSPORT_F_H2G) {
> > >   if (t_h2g) {
> > > @@ -2164,9 +2167,18 @@ int vsock_core_register(const struct
> > > vsock_transport *t, int features)
> > >   t_dgram = t;
> > >   }
> > >
> > > + if (features & VSOCK_TRANSPORT_F_LOCAL) {
> > > + if (t_local) {
> > > + err = -EBUSY;
> > > + goto err_busy;
> > > + }
> > > + t_local = t;
> > > + }
> > > +
> > >   transport_h2g = t_h2g;
> > >   transport_g2h = t_g2h;
> > >   transport_dgram = t_dgram;
> > > + transport_local = t_local;
> > >
> > >  err_busy:
> > >   mutex_unlock(_register_mutex);
> > > @@ -2187,6 +2199,9 @@ void vsock_core_unregister(const struct
> > > vsock_transport *t)
> > >   if (transport_dgram == t)
> > >   transport_dgram = NULL;
> > >
> > > + if (transport_local == t)
> > > + transport_local = NULL;
> > > +
> > >   mutex_unlock(_register_mutex);
> > >  }
> > >  EXPORT_SYMBOL_GPL(vsock_core_unregister);
> > > --
> > > 2.21.0
> >
> > Having loopback support as a separate transport fits nicely, but do we need
> to support
> > different variants of loopback? It could just be built in.
> 
> I agree with you, indeed initially I developed it as built in, but
> DEPMOD found a cyclic dependency because vsock_transport use
> virtio_transport_common that use vsock, so if I include vsock_transport
> in the vsock module, DEPMOD is not happy.
> 
> I don't know how to break this cyclic dependency, do you have any ideas?

One way to view this would be that the loopback transport and the support
it uses from virtio_transport_common are independent of virtio as such,
and could be part of  the af_vsock module if loopback is configured. So
in a way, the virtio g2h and h2g transports would be extensions of the
built in loopback transport. But that brings in quite a bit of code so
it could be better to just keep it as is.

Thanks,
Jorgen
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH net-next 2/6] vsock: add VMADDR_CID_LOCAL definition

2019-11-21 Thread Jorgen Hansen via Virtualization
> From: Stefano Garzarella [mailto:sgarz...@redhat.com]
> Sent: Tuesday, November 19, 2019 12:01 PM
> 
> The VMADDR_CID_RESERVED (1) was used by VMCI, but now it is not
> used anymore, so we can reuse it for local communication
> (loopback) adding the new well-know CID: VMADDR_CID_LOCAL.
> 
> Cc: Jorgen Hansen 
> Signed-off-by: Stefano Garzarella 
> ---
>  include/uapi/linux/vm_sockets.h | 8 +---
>  net/vmw_vsock/vmci_transport.c  | 2 +-
>  2 files changed, 6 insertions(+), 4 deletions(-)
> 
> diff --git a/include/uapi/linux/vm_sockets.h
> b/include/uapi/linux/vm_sockets.h
> index 68d57c5e99bc..fd0ed7221645 100644
> --- a/include/uapi/linux/vm_sockets.h
> +++ b/include/uapi/linux/vm_sockets.h
> @@ -99,11 +99,13 @@
> 
>  #define VMADDR_CID_HYPERVISOR 0
> 
> -/* This CID is specific to VMCI and can be considered reserved (even VMCI
> - * doesn't use it anymore, it's a legacy value from an older release).
> +/* Use this as the destination CID in an address when referring to the
> + * local communication (loopback).
> + * (This was VMADDR_CID_RESERVED, but even VMCI doesn't use it
> anymore,
> + * it was a legacy value from an older release).
>   */
> 
> -#define VMADDR_CID_RESERVED 1
> +#define VMADDR_CID_LOCAL 1
> 
>  /* Use this as the destination CID in an address when referring to the host
>   * (any process other than the hypervisor).  VMCI relies on it being 2, but
> diff --git a/net/vmw_vsock/vmci_transport.c
> b/net/vmw_vsock/vmci_transport.c
> index 644d32e43d23..4b8b1150a738 100644
> --- a/net/vmw_vsock/vmci_transport.c
> +++ b/net/vmw_vsock/vmci_transport.c
> @@ -648,7 +648,7 @@ static int vmci_transport_recv_dgram_cb(void *data,
> struct vmci_datagram *dg)
>  static bool vmci_transport_stream_allow(u32 cid, u32 port)
>  {
>   static const u32 non_socket_contexts[] = {
> - VMADDR_CID_RESERVED,
> + VMADDR_CID_LOCAL,
>   };
>   int i;
> 
> --
> 2.21.0

Reviewed-by: Jorgen Hansen 

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH net-next 0/6] vsock: add local transport support

2019-11-21 Thread Jorgen Hansen via Virtualization
> From: Stefano Garzarella [mailto:sgarz...@redhat.com]
> Sent: Tuesday, November 19, 2019 12:01 PM
> This series introduces a new transport (vsock_loopback) to handle
> local communication.
> This could be useful to test vsock core itself and to allow developers
> to test their applications without launching a VM.
> 
> Before this series, vmci and virtio transports allowed this behavior,
> but only in the guest.
> We are moving the loopback handling in a new transport, because it
> might be useful to provide this feature also in the host or when
> no H2G/G2H transports (hyperv, virtio, vmci) are loaded.
> 
> The user can use the loopback with the new VMADDR_CID_LOCAL (that
> replaces VMADDR_CID_RESERVED) in any condition.
> Otherwise, if the G2H transport is loaded, it can also use the guest
> local CID as previously supported by vmci and virtio transports.
> If G2H transport is not loaded, the user can also use VMADDR_CID_HOST
> for local communication.
> 
> Patch 1 is a cleanup to build virtio_transport_common without virtio
> Patch 2 adds the new VMADDR_CID_LOCAL, replacing
> VMADDR_CID_RESERVED
> Patch 3 adds a new feature flag to register a loopback transport
> Patch 4 adds the new vsock_loopback transport based on the loopback
> implementation of virtio_transport
> Patch 5 implements the logic to use the local transport for loopback
> communication
> Patch 6 removes the loopback from virtio_transport
> 
> @Jorgen: Do you think it might be a problem to replace
> VMADDR_CID_RESERVED with VMADDR_CID_LOCAL?

No, that should be fine. It has never allowed for use with stream sockets in
AF_VSOCK. The only potential use would be for datagram sockets, but that
side appears to be unaffected by your changes, since loopback is only
introduced for SOCK_STREAM.

> 
> Thanks,
> Stefano
> 
> Stefano Garzarella (6):
>   vsock/virtio_transport_common: remove unused virtio header includes
>   vsock: add VMADDR_CID_LOCAL definition
>   vsock: add local transport support in the vsock core
>   vsock: add vsock_loopback transport
>   vsock: use local transport when it is loaded
>   vsock/virtio: remove loopback handling
> 
>  MAINTAINERS |   1 +
>  include/net/af_vsock.h  |   2 +
>  include/uapi/linux/vm_sockets.h |   8 +-
>  net/vmw_vsock/Kconfig   |  12 ++
>  net/vmw_vsock/Makefile  |   1 +
>  net/vmw_vsock/af_vsock.c|  49 +-
>  net/vmw_vsock/virtio_transport.c|  61 +--
>  net/vmw_vsock/virtio_transport_common.c |   3 -
>  net/vmw_vsock/vmci_transport.c  |   2 +-
>  net/vmw_vsock/vsock_loopback.c  | 217
> 
>  10 files changed, 283 insertions(+), 73 deletions(-)
>  create mode 100644 net/vmw_vsock/vsock_loopback.c
> 
> --
> 2.21.0

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH net-next 3/6] vsock: add local transport support in the vsock core

2019-11-21 Thread Jorgen Hansen via Virtualization
> From: Stefano Garzarella [mailto:sgarz...@redhat.com]
> Sent: Tuesday, November 19, 2019 12:01 PM
> To: net...@vger.kernel.org
>
> This patch allows to register a transport able to handle
> local communication (loopback).
> 
> Signed-off-by: Stefano Garzarella 
> ---
>  include/net/af_vsock.h   |  2 ++
>  net/vmw_vsock/af_vsock.c | 17 -
>  2 files changed, 18 insertions(+), 1 deletion(-)
> 
> diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
> index 4206dc6d813f..b1c717286993 100644
> --- a/include/net/af_vsock.h
> +++ b/include/net/af_vsock.h
> @@ -98,6 +98,8 @@ struct vsock_transport_send_notify_data {
>  #define VSOCK_TRANSPORT_F_G2H0x0002
>  /* Transport provides DGRAM communication */
>  #define VSOCK_TRANSPORT_F_DGRAM  0x0004
> +/* Transport provides local (loopback) communication */
> +#define VSOCK_TRANSPORT_F_LOCAL  0x0008
> 
>  struct vsock_transport {
>   struct module *module;
> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
> index cc8659838bf2..c9e5bad59dc1 100644
> --- a/net/vmw_vsock/af_vsock.c
> +++ b/net/vmw_vsock/af_vsock.c
> @@ -136,6 +136,8 @@ static const struct vsock_transport *transport_h2g;
>  static const struct vsock_transport *transport_g2h;
>  /* Transport used for DGRAM communication */
>  static const struct vsock_transport *transport_dgram;
> +/* Transport used for local communication */
> +static const struct vsock_transport *transport_local;
>  static DEFINE_MUTEX(vsock_register_mutex);
> 
>  / UTILS /
> @@ -2130,7 +2132,7 @@ EXPORT_SYMBOL_GPL(vsock_core_get_transport);
> 
>  int vsock_core_register(const struct vsock_transport *t, int features)
>  {
> - const struct vsock_transport *t_h2g, *t_g2h, *t_dgram;
> + const struct vsock_transport *t_h2g, *t_g2h, *t_dgram, *t_local;
>   int err = mutex_lock_interruptible(_register_mutex);
> 
>   if (err)
> @@ -2139,6 +2141,7 @@ int vsock_core_register(const struct
> vsock_transport *t, int features)
>   t_h2g = transport_h2g;
>   t_g2h = transport_g2h;
>   t_dgram = transport_dgram;
> + t_local = transport_local;
> 
>   if (features & VSOCK_TRANSPORT_F_H2G) {
>   if (t_h2g) {
> @@ -2164,9 +2167,18 @@ int vsock_core_register(const struct
> vsock_transport *t, int features)
>   t_dgram = t;
>   }
> 
> + if (features & VSOCK_TRANSPORT_F_LOCAL) {
> + if (t_local) {
> + err = -EBUSY;
> + goto err_busy;
> + }
> + t_local = t;
> + }
> +
>   transport_h2g = t_h2g;
>   transport_g2h = t_g2h;
>   transport_dgram = t_dgram;
> + transport_local = t_local;
> 
>  err_busy:
>   mutex_unlock(_register_mutex);
> @@ -2187,6 +2199,9 @@ void vsock_core_unregister(const struct
> vsock_transport *t)
>   if (transport_dgram == t)
>   transport_dgram = NULL;
> 
> + if (transport_local == t)
> + transport_local = NULL;
> +
>   mutex_unlock(_register_mutex);
>  }
>  EXPORT_SYMBOL_GPL(vsock_core_unregister);
> --
> 2.21.0

Having loopback support as a separate transport fits nicely, but do we need to 
support
different variants of loopback? It could just be built in.

Thanks,
Jorgen
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH net-next 11/14] vsock: add multi-transports support

2019-11-13 Thread Jorgen Hansen via Virtualization
> From: Stefano Garzarella [mailto:sgarz...@redhat.com]
> Sent: Tuesday, November 12, 2019 11:37 AM

> > > > You already mentioned that you are working on a fix for loopback
> > > > here for the guest, but presumably a host could also do loopback.
> > >
> > > IIUC we don't support loopback in the host, because in this case the
> > > application will use the CID_HOST as address, but if we are in a nested
> > > VM environment we are in trouble.
> >
> > If both src and dst CID are CID_HOST, we should be fairly sure that this
> > Is host loopback, no? If src is anything else, we would do G2H.
> >
> 
> The problem is that we don't know the src until we assign a transport
> looking at the dst. (or if the user bound the socket to CID_HOST before
> the connect(), but it is not very common)
> 
> So if we are in a L1 and the user uses the local guest CID, it works, but if
> it uses the HOST_CID, the packet will go to the L0.
> 
> If we are in L0, it could be simple, because we can simply check if G2H
> is not loaded, so any packet to CID_HOST, is host loopback.
> 
> I think that if the user uses the IOCTL_VM_SOCKETS_GET_LOCAL_CID, to set
> the dest CID for the loopback, it works in both cases because we return the
> HOST_CID in L0, and always the guest CID in L1, also if a H2G is loaded to
> handle the L2.
> 
> Maybe we should document this in the man page.

Yeah, it seems like a good idea to flesh out the routing behavior for nested
VMs in the man page.

> 
> But I have a question: Does vmci support the host loopback?
> I've tried, and it seems not.

Only for datagrams - not for stream sockets.
 
> Also vhost-vsock doesn't support it, but virtio-vsock does.
> 
> > >
> > > Since several people asked about this feature at the KVM Forum, I would
> like
> > > to add a new VMADDR_CID_LOCAL (i.e. using the reserved 1) and
> implement
> > > loopback in the core.
> > >
> > > What do you think?
> >
> > What kind of use cases are mentioned in the KVM forum for loopback?
> One concern
> > is that we have to maintain yet another interprocess communication
> mechanism,
> > even though other choices exist already  (and those are likely to be more
> efficient
> > given the development time and specific focus that went into those). To
> me, the
> > local connections are mainly useful as a way to sanity test the protocol and
> transports.
> > However, if loopback is compelling, it would make sense have it in the core,
> since it
> > shouldn't need a specific transport.
> 
> The common use cases is for developer point of view, and to test the
> protocol and transports as you said.
> 
> People that are introducing VSOCK support in their projects, would like to
> test them on their own PC without starting a VM.
> 
> The idea is to move the code to handle loopback from the virtio-vsock,
> in the core, but in another series :-)

OK, that makes sense.

Thanks,
Jorgen
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH net-next 12/14] vsock/vmci: register vmci_transport only when VMCI guest/host are active

2019-11-12 Thread Jorgen Hansen via Virtualization
> From: Stefano Garzarella [mailto:sgarz...@redhat.com]
> Sent: Monday, November 11, 2019 6:31 PM
> On Mon, Nov 11, 2019 at 04:27:28PM +0000, Jorgen Hansen wrote:
> > > From: Stefano Garzarella [mailto:sgarz...@redhat.com]
> > > Sent: Wednesday, October 23, 2019 11:56 AM
> > >
> > > To allow other transports to be loaded with vmci_transport,
> > > we register the vmci_transport as G2H or H2G only when a VMCI guest
> > > or host is active.
> > >
> > > To do that, this patch adds a callback registered in the vmci driver
> > > that will be called when a new host or guest become active.
> > > This callback will register the vmci_transport in the VSOCK core.
> > > If the transport is already registered, we ignore the error coming
> > > from vsock_core_register().
> >
> > So today this is mainly an issue for the VMCI vsock transport, because
> > VMCI autoloads with vsock (and with this solution it can continue to
> > do that, so none of our old products break due to changed behavior,
> > which is great).
> 
> I tried to not break anything :-)
> 
> >  Shouldn't vhost behave similar, so that any module
> > that registers a h2g transport only does so if it is in active use?
> >
> 
> The vhost-vsock module will load when the first hypervisor open
> /dev/vhost-vsock, so in theory, when there's at least one active user.

Ok, sounds good then. 

> 
> >
> > > --- a/drivers/misc/vmw_vmci/vmci_host.c
> > > +++ b/drivers/misc/vmw_vmci/vmci_host.c
> > > @@ -108,6 +108,11 @@ bool vmci_host_code_active(void)
> > >atomic_read(_host_active_users) > 0);
> > >  }
> > >
> > > +int vmci_host_users(void)
> > > +{
> > > + return atomic_read(_host_active_users);
> > > +}
> > > +
> > >  /*
> > >   * Called on open of /dev/vmci.
> > >   */
> > > @@ -338,6 +343,8 @@ static int vmci_host_do_init_context(struct
> > > vmci_host_dev *vmci_host_dev,
> > >   vmci_host_dev->ct_type = VMCIOBJ_CONTEXT;
> > >   atomic_inc(_host_active_users);
> > >
> > > + vmci_call_vsock_callback(true);
> > > +
> >
> > Since we don't unregister the transport if user count drops back to 0, we
> could
> > just call this the first time, a VM is powered on after the module is 
> > loaded.
> 
> Yes, make sense. can I use the 'vmci_host_active_users' or is better to
> add a new 'vmci_host_vsock_loaded'?
> 
> My doubt is that vmci_host_active_users can return to 0, so when it returns
> to 1, we call vmci_call_vsock_callback() again.

vmci_host_active_users can drop to 0 and then increase again, so having a flag
indicating whether the callback has been invoked would ensure that it is only
called once.

Thanks,
Jorgen


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH net-next 11/14] vsock: add multi-transports support

2019-11-12 Thread Jorgen Hansen via Virtualization
> From: Stefano Garzarella [mailto:sgarz...@redhat.com]
> Sent: Monday, November 11, 2019 6:18 PM
> To: Jorgen Hansen 
> Subject: Re: [PATCH net-next 11/14] vsock: add multi-transports support
> 
> On Mon, Nov 11, 2019 at 01:53:39PM +0000, Jorgen Hansen wrote:
> > > From: Stefano Garzarella [mailto:sgarz...@redhat.com]
> > > Sent: Wednesday, October 23, 2019 11:56 AM
> >
> > Thanks a lot for working on this!
> >
> 
> Thanks to you for the reviews!
> 
> > > With the multi-transports support, we can use vsock with nested VMs
> (using
> > > also different hypervisors) loading both guest->host and
> > > host->guest transports at the same time.
> > >
> > > Major changes:
> > > - vsock core module can be loaded regardless of the transports
> > > - vsock_core_init() and vsock_core_exit() are renamed to
> > >   vsock_core_register() and vsock_core_unregister()
> > > - vsock_core_register() has a feature parameter (H2G, G2H, DGRAM)
> > >   to identify which directions the transport can handle and if it's
> > >   support DGRAM (only vmci)
> > > - each stream socket is assigned to a transport when the remote CID
> > >   is set (during the connect() or when we receive a connection request
> > >   on a listener socket).
> >
> > How about allowing the transport to be set during bind as well? That
> > would allow an application to ensure that it is using a specific transport,
> > i.e., if it binds to the host CID, it will use H2G, and if it binds to 
> > something
> > else it will use G2H? You can still use VMADDR_CID_ANY if you want to
> > initially listen to both transports.
> 
> Do you mean for socket that will call the connect()?

I was just thinking that in general we know the transport at that point, so we
could ensure that the socket would only see traffic from the relevant transport,
but as you mention below -  the updated bind lookup, and the added checks
when selecting transport should also take care of this, so that is fine.
 
> For listener socket the "[PATCH net-next 14/14] vsock: fix bind() behaviour
> taking care of CID" provides this behaviour.
> Since the listener sockets don't use any transport specific callback
> (they don't send any data to the remote peer), but they are used as
> placeholder,
> we don't need to assign them to a transport.
> 
> >
> >
> > >   The remote CID is used to decide which transport to use:
> > >   - remote CID > VMADDR_CID_HOST will use host->guest transport
> > >   - remote CID <= VMADDR_CID_HOST will use guest->host transport
> > > - listener sockets are not bound to any transports since no transport
> > >   operations are done on it. In this way we can create a listener
> > >   socket, also if the transports are not loaded or with VMADDR_CID_ANY
> > >   to listen on all transports.
> > > - DGRAM sockets are handled as before, since only the vmci_transport
> > >   provides this feature.
> > >
> > > Signed-off-by: Stefano Garzarella 
> > > ---
> > > RFC -> v1:
> > > - documented VSOCK_TRANSPORT_F_* flags
> > > - fixed vsock_assign_transport() when the socket is already assigned
> > >   (e.g connection failed)
> > > - moved features outside of struct vsock_transport, and used as
> > >   parameter of vsock_core_register()
> > > ---
> > >  drivers/vhost/vsock.c   |   5 +-
> > >  include/net/af_vsock.h  |  17 +-
> > >  net/vmw_vsock/af_vsock.c| 237 ++--
> > >  net/vmw_vsock/hyperv_transport.c|  26 ++-
> > >  net/vmw_vsock/virtio_transport.c|   7 +-
> > >  net/vmw_vsock/virtio_transport_common.c |  28 ++-
> > >  net/vmw_vsock/vmci_transport.c  |  31 +++-
> > >  7 files changed, 270 insertions(+), 81 deletions(-)
> > >
> >
> >
> > > diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
> index
> > > d89381166028..85d9a147 100644
> > > --- a/net/vmw_vsock/af_vsock.c
> > > +++ b/net/vmw_vsock/af_vsock.c
> > > @@ -130,7 +130,12 @@ static struct proto vsock_proto = {  #define
> > > VSOCK_DEFAULT_BUFFER_MAX_SIZE (1024 * 256)  #define
> > > VSOCK_DEFAULT_BUFFER_MIN_SIZE 128
> > >
> > > -static const struct vsock_transport *transport_single;
> > > +/* Transport used for host->guest communication */ static const struct
> > > +vsock_transport *transport_h2g;
> > > +/* Transport used for guest->host communi

RE: [PATCH net-next 14/14] vsock: fix bind() behaviour taking care of CID

2019-11-11 Thread Jorgen Hansen via Virtualization
> From: Stefano Garzarella [mailto:sgarz...@redhat.com]
> Sent: Wednesday, October 23, 2019 11:56 AM
> When we are looking for a socket bound to a specific address,
> we also have to take into account the CID.
> 
> This patch is useful with multi-transports support because it
> allows the binding of the same port with different CID, and
> it prevents a connection to a wrong socket bound to the same
> port, but with different CID.
> 
> Reviewed-by: Stefan Hajnoczi 
> Signed-off-by: Stefano Garzarella 
> ---
>  net/vmw_vsock/af_vsock.c | 10 --
>  1 file changed, 8 insertions(+), 2 deletions(-)

Reviewed-by: Jorgen Hansen 

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH net-next 13/14] vsock: prevent transport modules unloading

2019-11-11 Thread Jorgen Hansen via Virtualization
> From: Stefano Garzarella [mailto:sgarz...@redhat.com]
> Sent: Wednesday, October 23, 2019 11:56 AM

> This patch adds 'module' member in the 'struct vsock_transport'
> in order to get/put the transport module. This prevents the
> module unloading while sockets are assigned to it.
> 
> We increase the module refcnt when a socket is assigned to a
> transport, and we decrease the module refcnt when the socket
> is destructed.
> 
> Reviewed-by: Stefan Hajnoczi 
> Signed-off-by: Stefano Garzarella 
> ---
> RFC -> v1:
> - fixed typo 's/tranport/transport/' in a comment (Stefan)
> ---
>  drivers/vhost/vsock.c|  2 ++
>  include/net/af_vsock.h   |  2 ++
>  net/vmw_vsock/af_vsock.c | 20 
>  net/vmw_vsock/hyperv_transport.c |  2 ++
>  net/vmw_vsock/virtio_transport.c |  2 ++
>  net/vmw_vsock/vmci_transport.c   |  1 +
>  6 files changed, 25 insertions(+), 4 deletions(-)

Reviewed-by: Jorgen Hansen 

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH net-next 12/14] vsock/vmci: register vmci_transport only when VMCI guest/host are active

2019-11-11 Thread Jorgen Hansen via Virtualization
> From: Stefano Garzarella [mailto:sgarz...@redhat.com]
> Sent: Wednesday, October 23, 2019 11:56 AM
> 
> To allow other transports to be loaded with vmci_transport,
> we register the vmci_transport as G2H or H2G only when a VMCI guest
> or host is active.
> 
> To do that, this patch adds a callback registered in the vmci driver
> that will be called when a new host or guest become active.
> This callback will register the vmci_transport in the VSOCK core.
> If the transport is already registered, we ignore the error coming
> from vsock_core_register().

So today this is mainly an issue for the VMCI vsock transport, because
VMCI autoloads with vsock (and with this solution it can continue to
do that, so none of our old products break due to changed behavior,
which is great). Shouldn't vhost behave similar, so that any module
that registers a h2g transport only does so if it is in active use?


> --- a/drivers/misc/vmw_vmci/vmci_host.c
> +++ b/drivers/misc/vmw_vmci/vmci_host.c
> @@ -108,6 +108,11 @@ bool vmci_host_code_active(void)
>atomic_read(_host_active_users) > 0);
>  }
> 
> +int vmci_host_users(void)
> +{
> + return atomic_read(_host_active_users);
> +}
> +
>  /*
>   * Called on open of /dev/vmci.
>   */
> @@ -338,6 +343,8 @@ static int vmci_host_do_init_context(struct
> vmci_host_dev *vmci_host_dev,
>   vmci_host_dev->ct_type = VMCIOBJ_CONTEXT;
>   atomic_inc(_host_active_users);
> 
> + vmci_call_vsock_callback(true);
> +

Since we don't unregister the transport if user count drops back to 0, we could
just call this the first time, a VM is powered on after the module is loaded.

>   retval = 0;
> 
>  out:

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH net-next 11/14] vsock: add multi-transports support

2019-11-11 Thread Jorgen Hansen via Virtualization
> From: Stefano Garzarella [mailto:sgarz...@redhat.com]
> Sent: Wednesday, October 23, 2019 11:56 AM

Thanks a lot for working on this!

> With the multi-transports support, we can use vsock with nested VMs (using
> also different hypervisors) loading both guest->host and
> host->guest transports at the same time.
> 
> Major changes:
> - vsock core module can be loaded regardless of the transports
> - vsock_core_init() and vsock_core_exit() are renamed to
>   vsock_core_register() and vsock_core_unregister()
> - vsock_core_register() has a feature parameter (H2G, G2H, DGRAM)
>   to identify which directions the transport can handle and if it's
>   support DGRAM (only vmci)
> - each stream socket is assigned to a transport when the remote CID
>   is set (during the connect() or when we receive a connection request
>   on a listener socket).

How about allowing the transport to be set during bind as well? That
would allow an application to ensure that it is using a specific transport,
i.e., if it binds to the host CID, it will use H2G, and if it binds to something
else it will use G2H? You can still use VMADDR_CID_ANY if you want to
initially listen to both transports.


>   The remote CID is used to decide which transport to use:
>   - remote CID > VMADDR_CID_HOST will use host->guest transport
>   - remote CID <= VMADDR_CID_HOST will use guest->host transport
> - listener sockets are not bound to any transports since no transport
>   operations are done on it. In this way we can create a listener
>   socket, also if the transports are not loaded or with VMADDR_CID_ANY
>   to listen on all transports.
> - DGRAM sockets are handled as before, since only the vmci_transport
>   provides this feature.
> 
> Signed-off-by: Stefano Garzarella 
> ---
> RFC -> v1:
> - documented VSOCK_TRANSPORT_F_* flags
> - fixed vsock_assign_transport() when the socket is already assigned
>   (e.g connection failed)
> - moved features outside of struct vsock_transport, and used as
>   parameter of vsock_core_register()
> ---
>  drivers/vhost/vsock.c   |   5 +-
>  include/net/af_vsock.h  |  17 +-
>  net/vmw_vsock/af_vsock.c| 237 ++--
>  net/vmw_vsock/hyperv_transport.c|  26 ++-
>  net/vmw_vsock/virtio_transport.c|   7 +-
>  net/vmw_vsock/virtio_transport_common.c |  28 ++-
>  net/vmw_vsock/vmci_transport.c  |  31 +++-
>  7 files changed, 270 insertions(+), 81 deletions(-)
> 


> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index
> d89381166028..85d9a147 100644
> --- a/net/vmw_vsock/af_vsock.c
> +++ b/net/vmw_vsock/af_vsock.c
> @@ -130,7 +130,12 @@ static struct proto vsock_proto = {  #define
> VSOCK_DEFAULT_BUFFER_MAX_SIZE (1024 * 256)  #define
> VSOCK_DEFAULT_BUFFER_MIN_SIZE 128
> 
> -static const struct vsock_transport *transport_single;
> +/* Transport used for host->guest communication */ static const struct
> +vsock_transport *transport_h2g;
> +/* Transport used for guest->host communication */ static const struct
> +vsock_transport *transport_g2h;
> +/* Transport used for DGRAM communication */ static const struct
> +vsock_transport *transport_dgram;
>  static DEFINE_MUTEX(vsock_register_mutex);
> 
>  / UTILS /
> @@ -182,7 +187,7 @@ static int vsock_auto_bind(struct vsock_sock *vsk)
>   return __vsock_bind(sk, _addr);
>  }
> 
> -static int __init vsock_init_tables(void)
> +static void vsock_init_tables(void)
>  {
>   int i;
> 
> @@ -191,7 +196,6 @@ static int __init vsock_init_tables(void)
> 
>   for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++)
>   INIT_LIST_HEAD(_connected_table[i]);
> - return 0;
>  }
> 
>  static void __vsock_insert_bound(struct list_head *list, @@ -376,6 +380,62
> @@ void vsock_enqueue_accept(struct sock *listener, struct sock
> *connected)  }  EXPORT_SYMBOL_GPL(vsock_enqueue_accept);
> 
> +/* Assign a transport to a socket and call the .init transport callback.
> + *
> + * Note: for stream socket this must be called when vsk->remote_addr is
> +set
> + * (e.g. during the connect() or when a connection request on a
> +listener
> + * socket is received).
> + * The vsk->remote_addr is used to decide which transport to use:
> + *  - remote CID > VMADDR_CID_HOST will use host->guest transport
> + *  - remote CID <= VMADDR_CID_HOST will use guest->host transport  */
> +int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock
> +*psk) {
> + const struct vsock_transport *new_transport;
> + struct sock *sk = sk_vsock(vsk);
> +
> + switch (sk->sk_type) {
> + case SOCK_DGRAM:
> + new_transport = transport_dgram;
> + break;
> + case SOCK_STREAM:
> + if (vsk->remote_addr.svm_cid > VMADDR_CID_HOST)
> + new_transport = transport_h2g;
> + else
> + new_transport = transport_g2h;
> + break;

You already mentioned that you are working on 

RE: [PATCH net-next 11/14] vsock: add multi-transports support

2019-10-30 Thread Jorgen Hansen via Virtualization
> From: Stefano Garzarella [mailto:sgarz...@redhat.com]
> > +/* Assign a transport to a socket and call the .init transport callback.
> > + *
> > + * Note: for stream socket this must be called when vsk->remote_addr
> > +is set
> > + * (e.g. during the connect() or when a connection request on a
> > +listener
> > + * socket is received).
> > + * The vsk->remote_addr is used to decide which transport to use:
> > + *  - remote CID > VMADDR_CID_HOST will use host->guest transport
> > + *  - remote CID <= VMADDR_CID_HOST will use guest->host transport
> > +*/ int vsock_assign_transport(struct vsock_sock *vsk, struct
> > +vsock_sock *psk) {
> > +   const struct vsock_transport *new_transport;
> > +   struct sock *sk = sk_vsock(vsk);
> > +
> > +   switch (sk->sk_type) {
> > +   case SOCK_DGRAM:
> > +   new_transport = transport_dgram;
> > +   break;
> > +   case SOCK_STREAM:
> > +   if (vsk->remote_addr.svm_cid > VMADDR_CID_HOST)
> > +   new_transport = transport_h2g;
> > +   else
> > +   new_transport = transport_g2h;
> 
> I just noticed that this break the loopback in the guest.
> As a fix, we should use 'transport_g2h' when remote_cid <=
> VMADDR_CID_HOST or remote_cid is the id of 'transport_g2h'.
> 
> To do that we also need to avoid that L2 guests can have the same CID of L1.
> For vhost_vsock I can call vsock_find_cid() in vhost_vsock_set_cid()
> 
> @Jorgen: for vmci we need to do the same? or it is guaranteed, since it's
> already support nested VMs, that a L2 guests cannot have the same CID as
> the L1.

As far as I can tell, we have the same issue with the current support for 
nested VMs in
VMCI. If we have an L2 guest with the same CID as the L1 guest, we will always 
send to
the L2 guest, and we may assign an L2 guest the same CID as L1. It should be 
straight
forward to avoid this, though.

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH net-next 09/14] vsock: move vsock_insert_unbound() in the vsock_create()

2019-10-30 Thread Jorgen Hansen via Virtualization
> From: Stefano Garzarella [mailto:sgarz...@redhat.com]
> Sent: Wednesday, October 23, 2019 11:56 AM
> Subject: [PATCH net-next 09/14] vsock: move vsock_insert_unbound() in the
> vsock_create()
> 
> vsock_insert_unbound() was called only when 'sock' parameter of
> __vsock_create() was not null. This only happened when
> __vsock_create() was called by vsock_create().
> 
> In order to simplify the multi-transports support, this patch moves
> vsock_insert_unbound() at the end of vsock_create().
> 
> Reviewed-by: Dexuan Cui 
> Reviewed-by: Stefan Hajnoczi 
> Signed-off-by: Stefano Garzarella 
> ---
>  net/vmw_vsock/af_vsock.c | 13 +

Reviewed-by: Jorgen Hansen 
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH net-next 08/14] vsock: add vsock_create_connected() called by transports

2019-10-30 Thread Jorgen Hansen via Virtualization
> From: Stefano Garzarella [mailto:sgarz...@redhat.com]
> Sent: Wednesday, October 23, 2019 11:56 AM
> Subject: [PATCH net-next 08/14] vsock: add vsock_create_connected() called
> by transports
> 
> All transports call __vsock_create() with the same parameters,
> most of them depending on the parent socket. In order to simplify
> the VSOCK core APIs exposed to the transports, this patch adds
> the vsock_create_connected() callable from transports to create
> a new socket when a connection request is received.
> We also unexported the __vsock_create().
> 
> Suggested-by: Stefan Hajnoczi 
> Signed-off-by: Stefano Garzarella 
> ---
>  include/net/af_vsock.h  |  5 +
>  net/vmw_vsock/af_vsock.c| 20 +---
>  net/vmw_vsock/hyperv_transport.c|  3 +--
>  net/vmw_vsock/virtio_transport_common.c |  3 +--
>  net/vmw_vsock/vmci_transport.c  |  3 +--
>  5 files changed, 17 insertions(+), 17 deletions(-)

Reviewed-by: Jorgen Hansen 
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH net-next 07/14] vsock: handle buffer_size sockopts in the core

2019-10-30 Thread Jorgen Hansen via Virtualization
d(vmci_trans(vsk)->qp_handle);
>  }
> 
> -static u64 vmci_transport_get_buffer_size(struct vsock_sock *vsk) -{
> - return vmci_trans(vsk)->queue_pair_size;
> -}
> -
> -static u64 vmci_transport_get_min_buffer_size(struct vsock_sock *vsk) -{
> - return vmci_trans(vsk)->queue_pair_min_size;
> -}
> -
> -static u64 vmci_transport_get_max_buffer_size(struct vsock_sock *vsk) -{
> - return vmci_trans(vsk)->queue_pair_max_size;
> -}
> -
> -static void vmci_transport_set_buffer_size(struct vsock_sock *vsk, u64 val)
> -{
> - if (val < vmci_trans(vsk)->queue_pair_min_size)
> - vmci_trans(vsk)->queue_pair_min_size = val;
> - if (val > vmci_trans(vsk)->queue_pair_max_size)
> - vmci_trans(vsk)->queue_pair_max_size = val;
> - vmci_trans(vsk)->queue_pair_size = val;
> -}
> -
> -static void vmci_transport_set_min_buffer_size(struct vsock_sock *vsk,
> -u64 val)
> -{
> - if (val > vmci_trans(vsk)->queue_pair_size)
> - vmci_trans(vsk)->queue_pair_size = val;
> - vmci_trans(vsk)->queue_pair_min_size = val;
> -}
> -
> -static void vmci_transport_set_max_buffer_size(struct vsock_sock *vsk,
> -u64 val)
> -{
> - if (val < vmci_trans(vsk)->queue_pair_size)
> - vmci_trans(vsk)->queue_pair_size = val;
> - vmci_trans(vsk)->queue_pair_max_size = val;
> -}
> -
>  static int vmci_transport_notify_poll_in(
>   struct vsock_sock *vsk,
>   size_t target,
> @@ -2098,12 +2036,6 @@ static const struct vsock_transport vmci_transport
> = {
>   .notify_send_pre_enqueue =
> vmci_transport_notify_send_pre_enqueue,
>   .notify_send_post_enqueue =
> vmci_transport_notify_send_post_enqueue,
>   .shutdown = vmci_transport_shutdown,
> - .set_buffer_size = vmci_transport_set_buffer_size,
> - .set_min_buffer_size = vmci_transport_set_min_buffer_size,
> - .set_max_buffer_size = vmci_transport_set_max_buffer_size,
> - .get_buffer_size = vmci_transport_get_buffer_size,
> - .get_min_buffer_size = vmci_transport_get_min_buffer_size,
> - .get_max_buffer_size = vmci_transport_get_max_buffer_size,
>   .get_local_cid = vmci_transport_get_local_cid,  };
> 
> diff --git a/net/vmw_vsock/vmci_transport.h
> b/net/vmw_vsock/vmci_transport.h index 1ca1e8640b31..b7b072194282
> 100644
> --- a/net/vmw_vsock/vmci_transport.h
> +++ b/net/vmw_vsock/vmci_transport.h
> @@ -108,9 +108,6 @@ struct vmci_transport {
>   struct vmci_qp *qpair;
>   u64 produce_size;
>   u64 consume_size;
> - u64 queue_pair_size;
> - u64 queue_pair_min_size;
> - u64 queue_pair_max_size;
>   u32 detach_sub_id;
>   union vmci_transport_notify notify;
>   const struct vmci_transport_notify_ops *notify_ops;
> --
> 2.21.0

Reviewed-by: Jorgen Hansen 
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH net-next 06/14] vsock: add 'struct vsock_sock *' param to vsock_core_get_transport()

2019-10-30 Thread Jorgen Hansen via Virtualization
> From: Stefano Garzarella [mailto:sgarz...@redhat.com]
> Sent: Wednesday, October 23, 2019 11:56 AM
> Subject: [PATCH net-next 06/14] vsock: add 'struct vsock_sock *' param to
> vsock_core_get_transport()
> 
> Since now the 'struct vsock_sock' object contains a pointer to the transport,
> this patch adds a parameter to the
> vsock_core_get_transport() to return the right transport assigned to the
> socket.
> 
> This patch modifies also the virtio_transport_get_ops(), that uses the
> vsock_core_get_transport(), adding the 'struct vsock_sock *' parameter.
> 
> Reviewed-by: Stefan Hajnoczi 
> Signed-off-by: Stefano Garzarella 
> ---
> RFC -> v1:
> - Removed comment about protecting transport_single (Stefan)
> ---
>  include/net/af_vsock.h  | 2 +-
>  net/vmw_vsock/af_vsock.c| 7 ++-
>  net/vmw_vsock/virtio_transport_common.c | 9 +
>  3 files changed, 8 insertions(+), 10 deletions(-)
> 
> diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index
> a5e1e134261d..2ca67d048de4 100644
> --- a/include/net/af_vsock.h
> +++ b/include/net/af_vsock.h
> @@ -166,7 +166,7 @@ static inline int vsock_core_init(const struct
> vsock_transport *t)  void vsock_core_exit(void);
> 
>  /* The transport may downcast this to access transport-specific functions */
> -const struct vsock_transport *vsock_core_get_transport(void);
> +const struct vsock_transport *vsock_core_get_transport(struct
> +vsock_sock *vsk);
> 
>  / UTILS /
> 
> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index
> c3a14f853eb0..eaea159006c8 100644
> --- a/net/vmw_vsock/af_vsock.c
> +++ b/net/vmw_vsock/af_vsock.c
> @@ -2001,12 +2001,9 @@ void vsock_core_exit(void)  }
> EXPORT_SYMBOL_GPL(vsock_core_exit);
> 
> -const struct vsock_transport *vsock_core_get_transport(void)
> +const struct vsock_transport *vsock_core_get_transport(struct
> +vsock_sock *vsk)
>  {
> - /* vsock_register_mutex not taken since only the transport uses this
> -  * function and only while registered.
> -  */
> - return transport_single;
> + return vsk->transport;
>  }
>  EXPORT_SYMBOL_GPL(vsock_core_get_transport);
> 
> diff --git a/net/vmw_vsock/virtio_transport_common.c
> b/net/vmw_vsock/virtio_transport_common.c
> index 9763394f7a61..37a1c7e7c7fe 100644
> --- a/net/vmw_vsock/virtio_transport_common.c
> +++ b/net/vmw_vsock/virtio_transport_common.c
> @@ -29,9 +29,10 @@
>  /* Threshold for detecting small packets to copy */  #define
> GOOD_COPY_LEN  128
> 
> -static const struct virtio_transport *virtio_transport_get_ops(void)
> +static const struct virtio_transport *
> +virtio_transport_get_ops(struct vsock_sock *vsk)
>  {
> - const struct vsock_transport *t = vsock_core_get_transport();
> + const struct vsock_transport *t = vsock_core_get_transport(vsk);
> 
>   return container_of(t, struct virtio_transport, transport);  } @@ -
> 168,7 +169,7 @@ static int virtio_transport_send_pkt_info(struct vsock_sock
> *vsk,
>   struct virtio_vsock_pkt *pkt;
>   u32 pkt_len = info->pkt_len;
> 
> - src_cid = virtio_transport_get_ops()->transport.get_local_cid();
> + src_cid = virtio_transport_get_ops(vsk)->transport.get_local_cid();
>   src_port = vsk->local_addr.svm_port;
>   if (!info->remote_cid) {
>   dst_cid = vsk->remote_addr.svm_cid;
> @@ -201,7 +202,7 @@ static int virtio_transport_send_pkt_info(struct
> vsock_sock *vsk,
> 
>   virtio_transport_inc_tx_pkt(vvs, pkt);
> 
> - return virtio_transport_get_ops()->send_pkt(pkt);
> + return virtio_transport_get_ops(vsk)->send_pkt(pkt);
>  }
> 
>  static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs,
> --
> 2.21.0

Reviewed-by: Jorgen Hansen 
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH net-next 04/14] vsock: add 'transport' member in the struct vsock_sock

2019-10-30 Thread Jorgen Hansen via Virtualization
> 
>   sk = sock->sk;
>   vsk = vsock_sk(sk);
> + transport = vsk->transport;
>   total_written = 0;
>   err = 0;
> 
> @@ -1648,6 +1666,7 @@ vsock_stream_recvmsg(struct socket *sock, struct
> msghdr *msg, size_t len,  {
>   struct sock *sk;
>   struct vsock_sock *vsk;
> + const struct vsock_transport *transport;
>   int err;
>   size_t target;
>   ssize_t copied;
> @@ -1658,6 +1677,7 @@ vsock_stream_recvmsg(struct socket *sock, struct
> msghdr *msg, size_t len,
> 
>   sk = sock->sk;
>   vsk = vsock_sk(sk);
> + transport = vsk->transport;
>   err = 0;
> 
>   lock_sock(sk);
> @@ -1872,7 +1892,7 @@ static long vsock_dev_do_ioctl(struct file *filp,
> 
>   switch (cmd) {
>   case IOCTL_VM_SOCKETS_GET_LOCAL_CID:
> - if (put_user(transport->get_local_cid(), p) != 0)
> + if (put_user(transport_single->get_local_cid(), p) != 0)
>   retval = -EFAULT;
>   break;
> 
> @@ -1919,7 +1939,7 @@ int __vsock_core_init(const struct vsock_transport
> *t, struct module *owner)
>   if (err)
>   return err;
> 
> - if (transport) {
> + if (transport_single) {
>   err = -EBUSY;
>   goto err_busy;
>   }
> @@ -1928,7 +1948,7 @@ int __vsock_core_init(const struct vsock_transport
> *t, struct module *owner)
>* unload while there are open sockets.
>*/
>   vsock_proto.owner = owner;
> - transport = t;
> + transport_single = t;
> 
>   vsock_device.minor = MISC_DYNAMIC_MINOR;
>   err = misc_register(_device);
> @@ -1958,7 +1978,7 @@ int __vsock_core_init(const struct vsock_transport
> *t, struct module *owner)
>  err_deregister_misc:
>   misc_deregister(_device);
>  err_reset_transport:
> - transport = NULL;
> + transport_single = NULL;
>  err_busy:
>   mutex_unlock(_register_mutex);
>   return err;
> @@ -1975,7 +1995,7 @@ void vsock_core_exit(void)
> 
>   /* We do not want the assignment below re-ordered. */
>   mb();
> - transport = NULL;
> + transport_single = NULL;
> 
>   mutex_unlock(_register_mutex);
>  }
> @@ -1986,7 +2006,7 @@ const struct vsock_transport
> *vsock_core_get_transport(void)
>   /* vsock_register_mutex not taken since only the transport uses this
>* function and only while registered.
>*/
> - return transport;
> + return transport_single;
>  }
>  EXPORT_SYMBOL_GPL(vsock_core_get_transport);
> 
> --
> 2.21.0

Reviewed-by: Jorgen Hansen 
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH net-next 02/14] vsock: remove vm_sockets_get_local_cid()

2019-10-30 Thread Jorgen Hansen via Virtualization
> -Original Message-
> From: Stefano Garzarella [mailto:sgarz...@redhat.com]
> Sent: Wednesday, October 23, 2019 11:56 AM
> To: net...@vger.kernel.org
> Subject: [PATCH net-next 02/14] vsock: remove vm_sockets_get_local_cid()
> 
> vm_sockets_get_local_cid() is only used in virtio_transport_common.c.
> We can replace it calling the virtio_transport_get_ops() and using the
> get_local_cid() callback registered by the transport.
> 
> Reviewed-by: Stefan Hajnoczi 
> Signed-off-by: Stefano Garzarella 
> ---
>  include/linux/vm_sockets.h  |  2 --
>  net/vmw_vsock/af_vsock.c| 10 --
>  net/vmw_vsock/virtio_transport_common.c |  2 +-
>  3 files changed, 1 insertion(+), 13 deletions(-)
> 
> diff --git a/include/linux/vm_sockets.h b/include/linux/vm_sockets.h index
> 33f1a2ecd905..7dd899ccb920 100644
> --- a/include/linux/vm_sockets.h
> +++ b/include/linux/vm_sockets.h
> @@ -10,6 +10,4 @@
> 
>  #include 
> 
> -int vm_sockets_get_local_cid(void);
> -
>  #endif /* _VM_SOCKETS_H */
> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index
> 2ab43b2bba31..2f2582fb7fdd 100644
> --- a/net/vmw_vsock/af_vsock.c
> +++ b/net/vmw_vsock/af_vsock.c
> @@ -129,16 +129,6 @@ static struct proto vsock_proto = {  static const struct
> vsock_transport *transport;  static DEFINE_MUTEX(vsock_register_mutex);
> 
> -/ EXPORTS /
> -
> -/* Get the ID of the local context.  This is transport dependent. */
> -
> -int vm_sockets_get_local_cid(void)
> -{
> - return transport->get_local_cid();
> -}
> -EXPORT_SYMBOL_GPL(vm_sockets_get_local_cid);
> -
>  / UTILS /
> 
>  /* Each bound VSocket is stored in the bind hash table and each connected
> diff --git a/net/vmw_vsock/virtio_transport_common.c
> b/net/vmw_vsock/virtio_transport_common.c
> index d02c9b41a768..b1cd16ed66ea 100644
> --- a/net/vmw_vsock/virtio_transport_common.c
> +++ b/net/vmw_vsock/virtio_transport_common.c
> @@ -168,7 +168,7 @@ static int virtio_transport_send_pkt_info(struct
> vsock_sock *vsk,
>   struct virtio_vsock_pkt *pkt;
>   u32 pkt_len = info->pkt_len;
> 
> - src_cid = vm_sockets_get_local_cid();
> + src_cid = virtio_transport_get_ops()->transport.get_local_cid();
>   src_port = vsk->local_addr.svm_port;
>   if (!info->remote_cid) {
>   dst_cid = vsk->remote_addr.svm_cid;
> --
> 2.21.0

Reviewed-by: Jorgen Hansen 
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH net-next 03/14] vsock: remove include/linux/vm_sockets.h file

2019-10-30 Thread Jorgen Hansen via Virtualization
> From: Stefano Garzarella [mailto:sgarz...@redhat.com]
> Sent: Wednesday, October 23, 2019 11:56 AM
> Subject: [PATCH net-next 03/14] vsock: remove include/linux/vm_sockets.h
> file
> 
> This header file now only includes the "uapi/linux/vm_sockets.h".
> We can include directly it when needed.
> 
> Reviewed-by: Stefan Hajnoczi 
> Signed-off-by: Stefano Garzarella 
> ---
>  include/linux/vm_sockets.h| 13 -
>  include/net/af_vsock.h|  2 +-
>  include/net/vsock_addr.h  |  2 +-
>  net/vmw_vsock/vmci_transport_notify.h |  1 -
>  4 files changed, 2 insertions(+), 16 deletions(-)  delete mode 100644
> include/linux/vm_sockets.h
> 
> diff --git a/include/linux/vm_sockets.h b/include/linux/vm_sockets.h
> deleted file mode 100644 index 7dd899ccb920..
> --- a/include/linux/vm_sockets.h
> +++ /dev/null
> @@ -1,13 +0,0 @@
> -/* SPDX-License-Identifier: GPL-2.0-only */
> -/*
> - * VMware vSockets Driver
> - *
> - * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
> - */
> -
> -#ifndef _VM_SOCKETS_H
> -#define _VM_SOCKETS_H
> -
> -#include 
> -
> -#endif /* _VM_SOCKETS_H */
> diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index
> 80ea0f93d3f7..c660402b10f2 100644
> --- a/include/net/af_vsock.h
> +++ b/include/net/af_vsock.h
> @@ -10,7 +10,7 @@
> 
>  #include 
>  #include 
> -#include 
> +#include 
> 
>  #include "vsock_addr.h"
> 
> diff --git a/include/net/vsock_addr.h b/include/net/vsock_addr.h index
> 57d2db5c4bdf..cf8cc140d68d 100644
> --- a/include/net/vsock_addr.h
> +++ b/include/net/vsock_addr.h
> @@ -8,7 +8,7 @@
>  #ifndef _VSOCK_ADDR_H_
>  #define _VSOCK_ADDR_H_
> 
> -#include 
> +#include 
> 
>  void vsock_addr_init(struct sockaddr_vm *addr, u32 cid, u32 port);  int
> vsock_addr_validate(const struct sockaddr_vm *addr); diff --git
> a/net/vmw_vsock/vmci_transport_notify.h
> b/net/vmw_vsock/vmci_transport_notify.h
> index 7843f08d4290..a1aa5a998c0e 100644
> --- a/net/vmw_vsock/vmci_transport_notify.h
> +++ b/net/vmw_vsock/vmci_transport_notify.h
> @@ -11,7 +11,6 @@
>  #include 
>  #include 
>  #include 
> -#include 
> 
>  #include "vmci_transport.h"
> 
> --
> 2.21.0

Reviewed-by: Jorgen Hansen 
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH net-next 01/14] vsock/vmci: remove unused VSOCK_DEFAULT_CONNECT_TIMEOUT

2019-10-30 Thread Jorgen Hansen via Virtualization
> From: Stefano Garzarella [mailto:sgarz...@redhat.com]
> Sent: Wednesday, October 23, 2019 11:56 AM
> Subject: [PATCH net-next 01/14] vsock/vmci: remove unused
> VSOCK_DEFAULT_CONNECT_TIMEOUT
> 
> The VSOCK_DEFAULT_CONNECT_TIMEOUT definition was introduced with
> commit d021c344051af ("VSOCK: Introduce VM Sockets"), but it is never used
> in the net/vmw_vsock/vmci_transport.c.
> 
> VSOCK_DEFAULT_CONNECT_TIMEOUT is used and defined in
> net/vmw_vsock/af_vsock.c
> 
> Cc: Jorgen Hansen 
> Reviewed-by: Stefan Hajnoczi 
> Signed-off-by: Stefano Garzarella 
> ---
>  net/vmw_vsock/vmci_transport.c | 5 -
>  1 file changed, 5 deletions(-)
> 
> diff --git a/net/vmw_vsock/vmci_transport.c
> b/net/vmw_vsock/vmci_transport.c index 8c9c4ed90fa7..f8e3131ac480
> 100644
> --- a/net/vmw_vsock/vmci_transport.c
> +++ b/net/vmw_vsock/vmci_transport.c
> @@ -78,11 +78,6 @@ static int PROTOCOL_OVERRIDE = -1;
>  #define VMCI_TRANSPORT_DEFAULT_QP_SIZE   262144
>  #define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX   262144
> 
> -/* The default peer timeout indicates how long we will wait for a peer
> response
> - * to a control message.
> - */
> -#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ)
> -
>  /* Helper function to convert from a VMCI error code to a VSock error code.
> */
> 
>  static s32 vmci_transport_error_to_vsock_error(s32 vmci_error)
> --
> 2.21.0

Reviewed-by: Jorgen Hansen 
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH] VMCI/VSOCK: Add maintainers for VMCI, AF_VSOCK and VMCI transport

2019-03-21 Thread Jorgen Hansen via Virtualization
Update the maintainers file to include maintainers for the VMware
vmci driver, af_vsock, and the vsock vmci transport.

Signed-off-by: Jorgen Hansen 
---
 MAINTAINERS | 20 
 1 file changed, 20 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index e17ebf7..b9714fc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16630,6 +16630,14 @@ S: Maintained
 F: drivers/scsi/vmw_pvscsi.c
 F: drivers/scsi/vmw_pvscsi.h
 
+VMWARE VMCI DRIVER
+M: Jorgen Hansen 
+M: Vishnu Dasa 
+M: "VMware, Inc." 
+L: linux-ker...@vger.kernel.org
+S: Maintained
+F: drivers/misc/vmw_vmci/
+
 VMWARE VMMOUSE SUBDRIVER
 M: "VMware Graphics" 
 M: "VMware, Inc." 
@@ -16638,6 +16646,18 @@ S: Maintained
 F: drivers/input/mouse/vmmouse.c
 F: drivers/input/mouse/vmmouse.h
 
+VMWARE VSOCK DRIVER (AF_VSOCK) AND VMCI TRANSPORT
+M: Jorgen Hansen 
+M: Vishnu Dasa 
+M: "VMware, Inc." 
+L: net...@vger.kernel.org
+S: Maintained
+F: net/vmw_vsock/af_vsock.c
+F: net/vmw_vsock/vmci_transport*
+F: net/vmw_vsock/vsock_addr.c
+F: include/linux/vm_sockets.h
+F: include/uapi/linux/vm_sockets.h
+
 VMWARE VMXNET3 ETHERNET DRIVER
 M: Ronak Doshi 
 M: "VMware, Inc." 
-- 
2.6.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH] VMCI: Verify PPNs before sending to device

2019-01-16 Thread Jorgen Hansen via Virtualization
The current version of the VMCI device only supports 32 bit PPNs,
so check whether we are truncating PPNs, and fail the operation
if we do. One such check did exist, but was wrong. Another
check was missing.

Testing through code modification: constructed PPN not representable
by 32-bit and observed that failure was reported.

Fixes: 1f166439917b ("VMCI: guest side driver implementation.")
Fixes: 06164d2b72aa ("VMCI: queue pairs implementation.")

Signed-off-by: Jorgen Hansen 
Reviewed-by: Adit Ranadive 
Reviewed-by: Vishnu Dasa 
---
 drivers/misc/vmw_vmci/vmci_guest.c  | 10 +++---
 drivers/misc/vmw_vmci/vmci_queue_pair.c | 10 --
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/misc/vmw_vmci/vmci_guest.c 
b/drivers/misc/vmw_vmci/vmci_guest.c
index dad5abee656e..02bb3866cf9e 100644
--- a/drivers/misc/vmw_vmci/vmci_guest.c
+++ b/drivers/misc/vmw_vmci/vmci_guest.c
@@ -532,10 +532,14 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
if (capabilities & VMCI_CAPS_NOTIFICATIONS) {
unsigned long bitmap_ppn =
vmci_dev->notification_base >> PAGE_SHIFT;
-   if (!vmci_dbell_register_notification_bitmap(bitmap_ppn)) {
+   u32 bitmap_ppn32 = bitmap_ppn;
+
+   if ((sizeof(bitmap_ppn) > sizeof(bitmap_ppn32)
+&& bitmap_ppn != bitmap_ppn32) ||
+   !vmci_dbell_register_notification_bitmap(bitmap_ppn)) {
dev_warn(>dev,
-"VMCI device unable to register notification 
bitmap with PPN 0x%x\n",
-(u32) bitmap_ppn);
+"VMCI device unable to register notification 
bitmap with PPN 0x%lx\n",
+bitmap_ppn);
error = -ENXIO;
goto err_remove_vmci_dev_g;
}
diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c 
b/drivers/misc/vmw_vmci/vmci_queue_pair.c
index 264f4ed8eef2..1da4f6cb01b2 100644
--- a/drivers/misc/vmw_vmci/vmci_queue_pair.c
+++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c
@@ -465,9 +465,8 @@ static int qp_alloc_ppn_set(void *prod_q,
for (i = 0; i < num_produce_pages; i++) {
unsigned long pfn;
 
-   produce_ppns[i] =
-   produce_q->kernel_if->u.g.pas[i] >> PAGE_SHIFT;
-   pfn = produce_ppns[i];
+   pfn = produce_q->kernel_if->u.g.pas[i] >> PAGE_SHIFT;
+   produce_ppns[i] = pfn;
 
/* Fail allocation if PFN isn't supported by hypervisor. */
if (sizeof(pfn) > sizeof(*produce_ppns)
@@ -478,9 +477,8 @@ static int qp_alloc_ppn_set(void *prod_q,
for (i = 0; i < num_consume_pages; i++) {
unsigned long pfn;
 
-   consume_ppns[i] =
-   consume_q->kernel_if->u.g.pas[i] >> PAGE_SHIFT;
-   pfn = consume_ppns[i];
+   pfn = consume_q->kernel_if->u.g.pas[i] >> PAGE_SHIFT;
+   consume_ppns[i] = pfn;
 
/* Fail allocation if PFN isn't supported by hypervisor. */
if (sizeof(pfn) > sizeof(*consume_ppns)
-- 
2.17.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v2] VSOCK: Send reset control packet when socket is partially bound

2018-12-19 Thread Jorgen Hansen
If a server side socket is bound to an address, but not in the listening
state yet, incoming connection requests should receive a reset control
packet in response. However, the function used to send the reset
silently drops the reset packet if the sending socket isn't bound
to a remote address (as is the case for a bound socket not yet in
the listening state). This change fixes this by using the src
of the incoming packet as destination for the reset packet in
this case.

Fixes: d021c344051a ("VSOCK: Introduce VM Sockets")
Reviewed-by: Adit Ranadive 
Reviewed-by: Vishnu Dasa 
Signed-off-by: Jorgen Hansen 
---

v1 -> v2:
- Changed order of local variables

 net/vmw_vsock/vmci_transport.c | 67 +++---
 1 file changed, 50 insertions(+), 17 deletions(-)

diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 0ae3614..5cf8935 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -264,6 +264,31 @@ vmci_transport_send_control_pkt_bh(struct sockaddr_vm *src,
 }
 
 static int
+vmci_transport_alloc_send_control_pkt(struct sockaddr_vm *src,
+ struct sockaddr_vm *dst,
+ enum vmci_transport_packet_type type,
+ u64 size,
+ u64 mode,
+ struct vmci_transport_waiting_info *wait,
+ u16 proto,
+ struct vmci_handle handle)
+{
+   struct vmci_transport_packet *pkt;
+   int err;
+
+   pkt = kmalloc(sizeof(*pkt), GFP_KERNEL);
+   if (!pkt)
+   return -ENOMEM;
+
+   err = __vmci_transport_send_control_pkt(pkt, src, dst, type, size,
+   mode, wait, proto, handle,
+   true);
+   kfree(pkt);
+
+   return err;
+}
+
+static int
 vmci_transport_send_control_pkt(struct sock *sk,
enum vmci_transport_packet_type type,
u64 size,
@@ -272,9 +297,7 @@ vmci_transport_send_control_pkt(struct sock *sk,
u16 proto,
struct vmci_handle handle)
 {
-   struct vmci_transport_packet *pkt;
struct vsock_sock *vsk;
-   int err;
 
vsk = vsock_sk(sk);
 
@@ -284,17 +307,10 @@ vmci_transport_send_control_pkt(struct sock *sk,
if (!vsock_addr_bound(>remote_addr))
return -EINVAL;
 
-   pkt = kmalloc(sizeof(*pkt), GFP_KERNEL);
-   if (!pkt)
-   return -ENOMEM;
-
-   err = __vmci_transport_send_control_pkt(pkt, >local_addr,
-   >remote_addr, type, size,
-   mode, wait, proto, handle,
-   true);
-   kfree(pkt);
-
-   return err;
+   return vmci_transport_alloc_send_control_pkt(>local_addr,
+>remote_addr,
+type, size, mode,
+wait, proto, handle);
 }
 
 static int vmci_transport_send_reset_bh(struct sockaddr_vm *dst,
@@ -312,12 +328,29 @@ static int vmci_transport_send_reset_bh(struct 
sockaddr_vm *dst,
 static int vmci_transport_send_reset(struct sock *sk,
 struct vmci_transport_packet *pkt)
 {
+   struct sockaddr_vm *dst_ptr;
+   struct sockaddr_vm dst;
+   struct vsock_sock *vsk;
+
if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST)
return 0;
-   return vmci_transport_send_control_pkt(sk,
-   VMCI_TRANSPORT_PACKET_TYPE_RST,
-   0, 0, NULL, VSOCK_PROTO_INVALID,
-   VMCI_INVALID_HANDLE);
+
+   vsk = vsock_sk(sk);
+
+   if (!vsock_addr_bound(>local_addr))
+   return -EINVAL;
+
+   if (vsock_addr_bound(>remote_addr)) {
+   dst_ptr = >remote_addr;
+   } else {
+   vsock_addr_init(, pkt->dg.src.context,
+   pkt->src_port);
+   dst_ptr = 
+   }
+   return vmci_transport_alloc_send_control_pkt(>local_addr, dst_ptr,
+VMCI_TRANSPORT_PACKET_TYPE_RST,
+0, 0, NULL, VSOCK_PROTO_INVALID,
+VMCI_INVALID_HANDLE);
 }
 
 static int vmci_transport_send_negotiate(struct sock *sk, size_t size)
-- 
2.6.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH] VSOCK: Send reset control packet when socket is partially bound

2018-12-13 Thread Jorgen Hansen
If a server side socket is bound to an address, but not in the listening
state yet, incoming connection requests should receive a reset control
packet in response. However, the function used to send the reset
silently drops the reset packet if the sending socket isn't bound
to a remote address (as is the case for a bound socket not yet in
the listening state). This change fixes this by using the src
of the incoming packet as destination for the reset packet in
this case.

Fixes: d021c344051a ("VSOCK: Introduce VM Sockets")
Reviewed-by: Adit Ranadive 
Reviewed-by: Vishnu Dasa 
Signed-off-by: Jorgen Hansen 
---
 net/vmw_vsock/vmci_transport.c | 67 +++---
 1 file changed, 50 insertions(+), 17 deletions(-)

diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 0ae3614..402d84e 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -264,6 +264,31 @@ vmci_transport_send_control_pkt_bh(struct sockaddr_vm *src,
 }
 
 static int
+vmci_transport_alloc_send_control_pkt(struct sockaddr_vm *src,
+ struct sockaddr_vm *dst,
+ enum vmci_transport_packet_type type,
+ u64 size,
+ u64 mode,
+ struct vmci_transport_waiting_info *wait,
+ u16 proto,
+ struct vmci_handle handle)
+{
+   struct vmci_transport_packet *pkt;
+   int err;
+
+   pkt = kmalloc(sizeof(*pkt), GFP_KERNEL);
+   if (!pkt)
+   return -ENOMEM;
+
+   err = __vmci_transport_send_control_pkt(pkt, src, dst, type, size,
+   mode, wait, proto, handle,
+   true);
+   kfree(pkt);
+
+   return err;
+}
+
+static int
 vmci_transport_send_control_pkt(struct sock *sk,
enum vmci_transport_packet_type type,
u64 size,
@@ -272,9 +297,7 @@ vmci_transport_send_control_pkt(struct sock *sk,
u16 proto,
struct vmci_handle handle)
 {
-   struct vmci_transport_packet *pkt;
struct vsock_sock *vsk;
-   int err;
 
vsk = vsock_sk(sk);
 
@@ -284,17 +307,10 @@ vmci_transport_send_control_pkt(struct sock *sk,
if (!vsock_addr_bound(>remote_addr))
return -EINVAL;
 
-   pkt = kmalloc(sizeof(*pkt), GFP_KERNEL);
-   if (!pkt)
-   return -ENOMEM;
-
-   err = __vmci_transport_send_control_pkt(pkt, >local_addr,
-   >remote_addr, type, size,
-   mode, wait, proto, handle,
-   true);
-   kfree(pkt);
-
-   return err;
+   return vmci_transport_alloc_send_control_pkt(>local_addr,
+>remote_addr,
+type, size, mode,
+wait, proto, handle);
 }
 
 static int vmci_transport_send_reset_bh(struct sockaddr_vm *dst,
@@ -312,12 +328,29 @@ static int vmci_transport_send_reset_bh(struct 
sockaddr_vm *dst,
 static int vmci_transport_send_reset(struct sock *sk,
 struct vmci_transport_packet *pkt)
 {
+   struct sockaddr_vm dst;
+   struct sockaddr_vm *dst_ptr;
+   struct vsock_sock *vsk;
+
if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST)
return 0;
-   return vmci_transport_send_control_pkt(sk,
-   VMCI_TRANSPORT_PACKET_TYPE_RST,
-   0, 0, NULL, VSOCK_PROTO_INVALID,
-   VMCI_INVALID_HANDLE);
+
+   vsk = vsock_sk(sk);
+
+   if (!vsock_addr_bound(>local_addr))
+   return -EINVAL;
+
+   if (vsock_addr_bound(>remote_addr)) {
+   dst_ptr = >remote_addr;
+   } else {
+   vsock_addr_init(, pkt->dg.src.context,
+   pkt->src_port);
+   dst_ptr = 
+   }
+   return vmci_transport_alloc_send_control_pkt(>local_addr, dst_ptr,
+VMCI_TRANSPORT_PACKET_TYPE_RST,
+0, 0, NULL, VSOCK_PROTO_INVALID,
+VMCI_INVALID_HANDLE);
 }
 
 static int vmci_transport_send_negotiate(struct sock *sk, size_t size)
-- 
2.6.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH] VSOCK: Don't set sk_state to TCP_CLOSE before testing it

2017-11-27 Thread Jorgen Hansen
A recent commit (3b4477d2dcf2) converted the sk_state to use
TCP constants. In that change, vmci_transport_handle_detach
was changed such that sk->sk_state was set to TCP_CLOSE before
we test whether it is TCP_SYN_SENT. This change moves the
sk_state change back to the original locations in that function.

Signed-off-by: Jorgen Hansen <jhan...@vmware.com>
---
 net/vmw_vsock/vmci_transport.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 56573dc..a7a73ff 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -804,8 +804,6 @@ static void vmci_transport_handle_detach(struct sock *sk)
 */
if (vsk->local_addr.svm_cid == VMADDR_CID_HOST ||
vsock_stream_has_data(vsk) <= 0) {
-   sk->sk_state = TCP_CLOSE;
-
if (sk->sk_state == TCP_SYN_SENT) {
/* The peer may detach from a queue pair while
 * we are still in the connecting state, i.e.,
@@ -815,10 +813,12 @@ static void vmci_transport_handle_detach(struct sock *sk)
 * event like a reset.
 */
 
+   sk->sk_state = TCP_CLOSE;
sk->sk_err = ECONNRESET;
sk->sk_error_report(sk);
return;
}
+   sk->sk_state = TCP_CLOSE;
}
sk->sk_state_change(sk);
}
-- 
2.6.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v2] VSOCK: Don't call vsock_stream_has_data in atomic context

2017-11-24 Thread Jorgen Hansen
When using the host personality, VMCI will grab a mutex for any
queue pair access. In the detach callback for the vmci vsock
transport, we call vsock_stream_has_data while holding a spinlock,
and vsock_stream_has_data will access a queue pair.

To avoid this, we can simply omit calling vsock_stream_has_data
for host side queue pairs, since the QPs are empty per default
when the guest has detached.

This bug affects users of VMware Workstation using kernel version
4.4 and later.

Testing: Ran vsock tests between guest and host, and verified that
with this change, the host isn't calling vsock_stream_has_data
during detach. Ran mixedTest between guest and host using both
guest and host as server.

v2: Rebased on top of recent change to sk_state values
Reviewed-by: Adit Ranadive <ad...@vmware.com>
Reviewed-by: Aditya Sarwade <asarw...@vmware.com>
Reviewed-by: Stefan Hajnoczi <stefa...@redhat.com>
Signed-off-by: Jorgen Hansen <jhan...@vmware.com>
---
 net/vmw_vsock/vmci_transport.c |   10 +++---
 1 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 391775e..56573dc 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -797,9 +797,13 @@ static void vmci_transport_handle_detach(struct sock *sk)
 
/* We should not be sending anymore since the peer won't be
 * there to receive, but we can still receive if there is data
-* left in our consume queue.
+* left in our consume queue. If the local endpoint is a host,
+* we can't call vsock_stream_has_data, since that may block,
+* but a host endpoint can't read data once the VM has
+* detached, so there is no available data in that case.
 */
-   if (vsock_stream_has_data(vsk) <= 0) {
+   if (vsk->local_addr.svm_cid == VMADDR_CID_HOST ||
+   vsock_stream_has_data(vsk) <= 0) {
sk->sk_state = TCP_CLOSE;
 
if (sk->sk_state == TCP_SYN_SENT) {
@@ -2144,7 +2148,7 @@ static void __exit vmci_transport_exit(void)
 
 MODULE_AUTHOR("VMware, Inc.");
 MODULE_DESCRIPTION("VMCI transport for Virtual Sockets");
-MODULE_VERSION("1.0.4.0-k");
+MODULE_VERSION("1.0.5.0-k");
 MODULE_LICENSE("GPL v2");
 MODULE_ALIAS("vmware_vsock");
 MODULE_ALIAS_NETPROTO(PF_VSOCK);
-- 
1.7.0

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH] VSOCK: Don't call vsock_stream_has_data in atomic context

2017-11-21 Thread Jorgen Hansen
When using the host personality, VMCI will grab a mutex for any
queue pair access. In the detach callback for the vmci vsock
transport, we call vsock_stream_has_data while holding a spinlock,
and vsock_stream_has_data will access a queue pair.

To avoid this, we can simply omit calling vsock_stream_has_data
for host side queue pairs, since the QPs are empty per default
when the guest has detached.

This bug affects users of VMware Workstation using kernel version
4.4 and later.

Testing: Ran vsock tests between guest and host, and verified that
with this change, the host isn't calling vsock_stream_has_data
during detach. Ran mixedTest between guest and host using both
guest and host as server.

Reviewed-by: Adit Ranadive <ad...@vmware.com>
Reviewed-by: Aditya Sarwade <asarw...@vmware.com>
Signed-off-by: Jorgen Hansen <jhan...@vmware.com>
---
 net/vmw_vsock/vmci_transport.c |   10 +++---
 1 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 10ae782..90bc1a7 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -798,9 +798,13 @@ static void vmci_transport_handle_detach(struct sock *sk)
 
/* We should not be sending anymore since the peer won't be
 * there to receive, but we can still receive if there is data
-* left in our consume queue.
+* left in our consume queue. If the local endpoint is a host,
+* we can't call vsock_stream_has_data, since that may block,
+* but a host endpoint can't read data once the VM has
+* detached, so there is no available data in that case.
 */
-   if (vsock_stream_has_data(vsk) <= 0) {
+   if (vsk->local_addr.svm_cid == VMADDR_CID_HOST ||
+   vsock_stream_has_data(vsk) <= 0) {
if (sk->sk_state == SS_CONNECTING) {
/* The peer may detach from a queue pair while
 * we are still in the connecting state, i.e.,
@@ -2145,7 +2149,7 @@ static void __exit vmci_transport_exit(void)
 
 MODULE_AUTHOR("VMware, Inc.");
 MODULE_DESCRIPTION("VMCI transport for Virtual Sockets");
-MODULE_VERSION("1.0.4.0-k");
+MODULE_VERSION("1.0.5.0-k");
 MODULE_LICENSE("GPL v2");
 MODULE_ALIAS("vmware_vsock");
 MODULE_ALIAS_NETPROTO(PF_VSOCK);
-- 
1.7.0

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH] VMCI: Doorbell create and destroy fixes

2016-10-06 Thread Jorgen Hansen
This change consists of two changes:

1) If vmci_doorbell_create is called when neither guest nor
   host personality as been initialized, vmci_get_context_id
   will return VMCI_INVALID_ID. In that case, we should fail
   the create call.
2) In doorbell destroy, we assume that vmci_guest_code_active()
   has the same return value on create and destroy. That may not
   be the case, so we may end up with the wrong refcount.
   Instead, destroy should check explicitly whether the doorbell
   is in the index table as an indicator of whether the guest
   code was active at create time.

Reviewed-by: Adit Ranadive <ad...@vmware.com>
Signed-off-by: Jorgen Hansen <jhan...@vmware.com>
---
 drivers/misc/vmw_vmci/vmci_doorbell.c |8 +++-
 drivers/misc/vmw_vmci/vmci_driver.c   |2 +-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/vmw_vmci/vmci_doorbell.c 
b/drivers/misc/vmw_vmci/vmci_doorbell.c
index a8cee33..b3fa738 100644
--- a/drivers/misc/vmw_vmci/vmci_doorbell.c
+++ b/drivers/misc/vmw_vmci/vmci_doorbell.c
@@ -431,6 +431,12 @@ int vmci_doorbell_create(struct vmci_handle *handle,
if (vmci_handle_is_invalid(*handle)) {
u32 context_id = vmci_get_context_id();
 
+   if (context_id == VMCI_INVALID_ID) {
+   pr_warn("Failed to get context ID\n");
+   result = VMCI_ERROR_NO_RESOURCES;
+   goto free_mem;
+   }
+
/* Let resource code allocate a free ID for us */
new_handle = vmci_make_handle(context_id, VMCI_INVALID_ID);
} else {
@@ -525,7 +531,7 @@ int vmci_doorbell_destroy(struct vmci_handle handle)
 
entry = container_of(resource, struct dbell_entry, resource);
 
-   if (vmci_guest_code_active()) {
+   if (!hlist_unhashed(>node)) {
int result;
 
dbell_index_table_remove(entry);
diff --git a/drivers/misc/vmw_vmci/vmci_driver.c 
b/drivers/misc/vmw_vmci/vmci_driver.c
index 896be15..d7eaf1e 100644
--- a/drivers/misc/vmw_vmci/vmci_driver.c
+++ b/drivers/misc/vmw_vmci/vmci_driver.c
@@ -113,5 +113,5 @@ module_exit(vmci_drv_exit);
 
 MODULE_AUTHOR("VMware, Inc.");
 MODULE_DESCRIPTION("VMware Virtual Machine Communication Interface.");
-MODULE_VERSION("1.1.4.0-k");
+MODULE_VERSION("1.1.5.0-k");
 MODULE_LICENSE("GPL v2");
-- 
1.7.0

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH] VSOCK: Don't dec ack backlog twice for rejected connections

2016-09-27 Thread Jorgen Hansen
If a pending socket is marked as rejected, we will decrease the
sk_ack_backlog twice. So don't decrement it for rejected sockets
in vsock_pending_work().

Testing of the rejected socket path was done through code
modifications.

Reported-by: Stefan Hajnoczi <stefa...@redhat.com>
Signed-off-by: Jorgen Hansen <jhan...@vmware.com>
Reviewed-by: Adit Ranadive <ad...@vmware.com>
Reviewed-by: Aditya Sarwade <asarw...@vmware.com>
---
 net/vmw_vsock/af_vsock.c |6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 17dbbe6..8a398b3 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -465,6 +465,8 @@ void vsock_pending_work(struct work_struct *work)
 
if (vsock_is_pending(sk)) {
vsock_remove_pending(listener, sk);
+
+   listener->sk_ack_backlog--;
} else if (!vsk->rejected) {
/* We are not on the pending list and accept() did not reject
 * us, so we must have been accepted by our user process.  We
@@ -475,8 +477,6 @@ void vsock_pending_work(struct work_struct *work)
goto out;
}
 
-   listener->sk_ack_backlog--;
-
/* We need to remove ourself from the global connected sockets list so
 * incoming packets can't find this socket, and to reduce the reference
 * count.
@@ -2010,5 +2010,5 @@ EXPORT_SYMBOL_GPL(vsock_core_get_transport);
 
 MODULE_AUTHOR("VMware, Inc.");
 MODULE_DESCRIPTION("VMware Virtual Socket Family");
-MODULE_VERSION("1.0.1.0-k");
+MODULE_VERSION("1.0.2.0-k");
 MODULE_LICENSE("GPL v2");
-- 
1.7.0

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH] VSOCK: Only check error on skb_recv_datagram when skb is NULL

2016-04-19 Thread Jorgen Hansen
If skb_recv_datagram returns an skb, we should ignore the err
value returned. Otherwise, datagram receives will return EAGAIN
when they have to wait for a datagram.

Acked-by: Adit Ranadive <ad...@vmware.com>
Signed-off-by: Jorgen Hansen <jhan...@vmware.com>
---
 net/vmw_vsock/vmci_transport.c |7 ++-
 1 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 662bdd2..5621473 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -1735,11 +1735,8 @@ static int vmci_transport_dgram_dequeue(struct 
vsock_sock *vsk,
/* Retrieve the head sk_buff from the socket's receive queue. */
err = 0;
skb = skb_recv_datagram(>sk, flags, noblock, );
-   if (err)
-   return err;
-
if (!skb)
-   return -EAGAIN;
+   return err;
 
dg = (struct vmci_datagram *)skb->data;
if (!dg)
@@ -2154,7 +2151,7 @@ module_exit(vmci_transport_exit);
 
 MODULE_AUTHOR("VMware, Inc.");
 MODULE_DESCRIPTION("VMCI transport for Virtual Sockets");
-MODULE_VERSION("1.0.3.0-k");
+MODULE_VERSION("1.0.4.0-k");
 MODULE_LICENSE("GPL v2");
 MODULE_ALIAS("vmware_vsock");
 MODULE_ALIAS_NETPROTO(PF_VSOCK);
-- 
1.7.0

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH] VSOCK: Detach QP check should filter out non matching QPs.

2016-04-05 Thread Jorgen Hansen
The check in vmci_transport_peer_detach_cb should only allow a
detach when the qp handle of the transport matches the one in
the detach message.

Testing: Before this change, a detach from a peer on a different
socket would cause an active stream socket to register a detach.

Reviewed-by: George Zhang <georgezh...@vmware.com>
Signed-off-by: Jorgen Hansen <jhan...@vmware.com>
---
 net/vmw_vsock/vmci_transport.c |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 0a369bb..662bdd2 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -842,7 +842,7 @@ static void vmci_transport_peer_detach_cb(u32 sub_id,
 * qp_handle.
 */
if (vmci_handle_is_invalid(e_payload->handle) ||
-   vmci_handle_is_equal(trans->qp_handle, e_payload->handle))
+   !vmci_handle_is_equal(trans->qp_handle, e_payload->handle))
return;
 
/* We don't ask for delayed CBs when we subscribe to this event (we
@@ -2154,7 +2154,7 @@ module_exit(vmci_transport_exit);
 
 MODULE_AUTHOR("VMware, Inc.");
 MODULE_DESCRIPTION("VMCI transport for Virtual Sockets");
-MODULE_VERSION("1.0.2.0-k");
+MODULE_VERSION("1.0.3.0-k");
 MODULE_LICENSE("GPL v2");
 MODULE_ALIAS("vmware_vsock");
 MODULE_ALIAS_NETPROTO(PF_VSOCK);
-- 
1.7.0

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH] VMCI: Use 32bit atomics for queue headers on X86_32

2015-11-12 Thread Jorgen Hansen
This change restricts the reading and setting of the head and tail
pointers on 32bit X86 to 32bit for both correctness and
performance reasons. On uniprocessor X86_32, the atomic64_read
may be implemented as a non-locked cmpxchg8b. This may result in
updates to the pointers done by the VMCI device being overwritten.
On MP systems, there is no such correctness issue, but using 32bit
atomics avoids the overhead of the locked 64bit operation. All this
is safe because the queue size on 32bit systems will never exceed
a 32bit value.

Reviewed-by: Thomas Hellstrom <thellst...@vmware.com>
Signed-off-by: Jorgen Hansen <jhan...@vmware.com>
---
 drivers/misc/vmw_vmci/vmci_driver.c |2 +-
 include/linux/vmw_vmci_defs.h   |   43 +++---
 2 files changed, 40 insertions(+), 5 deletions(-)

diff --git a/drivers/misc/vmw_vmci/vmci_driver.c 
b/drivers/misc/vmw_vmci/vmci_driver.c
index b823f9a..896be15 100644
--- a/drivers/misc/vmw_vmci/vmci_driver.c
+++ b/drivers/misc/vmw_vmci/vmci_driver.c
@@ -113,5 +113,5 @@ module_exit(vmci_drv_exit);
 
 MODULE_AUTHOR("VMware, Inc.");
 MODULE_DESCRIPTION("VMware Virtual Machine Communication Interface.");
-MODULE_VERSION("1.1.3.0-k");
+MODULE_VERSION("1.1.4.0-k");
 MODULE_LICENSE("GPL v2");
diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h
index 65ac54c..1bd31a3 100644
--- a/include/linux/vmw_vmci_defs.h
+++ b/include/linux/vmw_vmci_defs.h
@@ -734,6 +734,41 @@ static inline void *vmci_event_data_payload(struct 
vmci_event_data *ev_data)
 }
 
 /*
+ * Helper to read a value from a head or tail pointer. For X86_32, the
+ * pointer is treated as a 32bit value, since the pointer value
+ * never exceeds a 32bit value in this case. Also, doing an
+ * atomic64_read on X86_32 uniprocessor systems may be implemented
+ * as a non locked cmpxchg8b, that may end up overwriting updates done
+ * by the VMCI device to the memory location. On 32bit SMP, the lock
+ * prefix will be used, so correctness isn't an issue, but using a
+ * 64bit operation still adds unnecessary overhead.
+ */
+static inline u64 vmci_q_read_pointer(atomic64_t *var)
+{
+#if defined(CONFIG_X86_32)
+   return atomic_read((atomic_t *)var);
+#else
+   return atomic64_read(var);
+#endif
+}
+
+/*
+ * Helper to set the value of a head or tail pointer. For X86_32, the
+ * pointer is treated as a 32bit value, since the pointer value
+ * never exceeds a 32bit value in this case. On 32bit SMP, using a
+ * locked cmpxchg8b adds unnecessary overhead.
+ */
+static inline void vmci_q_set_pointer(atomic64_t *var,
+ u64 new_val)
+{
+#if defined(CONFIG_X86_32)
+   return atomic_set((atomic_t *)var, (u32)new_val);
+#else
+   return atomic64_set(var, new_val);
+#endif
+}
+
+/*
  * Helper to add a given offset to a head or tail pointer. Wraps the
  * value of the pointer around the max size of the queue.
  */
@@ -741,14 +776,14 @@ static inline void vmci_qp_add_pointer(atomic64_t *var,
   size_t add,
   u64 size)
 {
-   u64 new_val = atomic64_read(var);
+   u64 new_val = vmci_q_read_pointer(var);
 
if (new_val >= size - add)
new_val -= size;
 
new_val += add;
 
-   atomic64_set(var, new_val);
+   vmci_q_set_pointer(var, new_val);
 }
 
 /*
@@ -758,7 +793,7 @@ static inline u64
 vmci_q_header_producer_tail(const struct vmci_queue_header *q_header)
 {
struct vmci_queue_header *qh = (struct vmci_queue_header *)q_header;
-   return atomic64_read(>producer_tail);
+   return vmci_q_read_pointer(>producer_tail);
 }
 
 /*
@@ -768,7 +803,7 @@ static inline u64
 vmci_q_header_consumer_head(const struct vmci_queue_header *q_header)
 {
struct vmci_queue_header *qh = (struct vmci_queue_header *)q_header;
-   return atomic64_read(>consumer_head);
+   return vmci_q_read_pointer(>consumer_head);
 }
 
 /*
-- 
1.7.0

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH] VSOCK: sock_put wasn't safe to call in interrupt context

2015-10-21 Thread Jorgen Hansen
In the vsock vmci_transport driver, sock_put wasn't safe to call
in interrupt context, since that may call the vsock destructor
which in turn calls several functions that should only be called
from process context. This change defers the callling of these
functions  to a worker thread. All these functions were
deallocation of resources related to the transport itself.

Furthermore, an unused callback was removed to simplify the
cleanup.

Multiple customers have been hitting this issue when using
VMware tools on vSphere 2015.

Also added a version to the vmci transport module (starting from
1.0.2.0-k since up until now it appears that this module was
sharing version with vsock that is currently at 1.0.1.0-k).

Reviewed-by: Aditya Asarwade <asarw...@vmware.com>
Reviewed-by: Thomas Hellstrom <thellst...@vmware.com>
Signed-off-by: Jorgen Hansen <jhan...@vmware.com>
---
 net/vmw_vsock/vmci_transport.c |  173 +++-
 net/vmw_vsock/vmci_transport.h |4 +-
 2 files changed, 86 insertions(+), 91 deletions(-)

diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 1f63daf..5243ce2 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -40,13 +40,11 @@
 
 static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg);
 static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg);
-static void vmci_transport_peer_attach_cb(u32 sub_id,
- const struct vmci_event_data *ed,
- void *client_data);
 static void vmci_transport_peer_detach_cb(u32 sub_id,
  const struct vmci_event_data *ed,
  void *client_data);
 static void vmci_transport_recv_pkt_work(struct work_struct *work);
+static void vmci_transport_cleanup(struct work_struct *work);
 static int vmci_transport_recv_listen(struct sock *sk,
  struct vmci_transport_packet *pkt);
 static int vmci_transport_recv_connecting_server(
@@ -75,6 +73,10 @@ struct vmci_transport_recv_pkt_info {
struct vmci_transport_packet pkt;
 };
 
+static LIST_HEAD(vmci_transport_cleanup_list);
+static DEFINE_SPINLOCK(vmci_transport_cleanup_lock);
+static DECLARE_WORK(vmci_transport_cleanup_work, vmci_transport_cleanup);
+
 static struct vmci_handle vmci_transport_stream_handle = { VMCI_INVALID_ID,
   VMCI_INVALID_ID };
 static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
@@ -791,44 +793,6 @@ out:
return err;
 }
 
-static void vmci_transport_peer_attach_cb(u32 sub_id,
- const struct vmci_event_data *e_data,
- void *client_data)
-{
-   struct sock *sk = client_data;
-   const struct vmci_event_payload_qp *e_payload;
-   struct vsock_sock *vsk;
-
-   e_payload = vmci_event_data_const_payload(e_data);
-
-   vsk = vsock_sk(sk);
-
-   /* We don't ask for delayed CBs when we subscribe to this event (we
-* pass 0 as flags to vmci_event_subscribe()).  VMCI makes no
-* guarantees in that case about what context we might be running in,
-* so it could be BH or process, blockable or non-blockable.  So we
-* need to account for all possible contexts here.
-*/
-   local_bh_disable();
-   bh_lock_sock(sk);
-
-   /* XXX This is lame, we should provide a way to lookup sockets by
-* qp_handle.
-*/
-   if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle,
-e_payload->handle)) {
-   /* XXX This doesn't do anything, but in the future we may want
-* to set a flag here to verify the attach really did occur and
-* we weren't just sent a datagram claiming it was.
-*/
-   goto out;
-   }
-
-out:
-   bh_unlock_sock(sk);
-   local_bh_enable();
-}
-
 static void vmci_transport_handle_detach(struct sock *sk)
 {
struct vsock_sock *vsk;
@@ -871,28 +835,38 @@ static void vmci_transport_peer_detach_cb(u32 sub_id,
  const struct vmci_event_data *e_data,
  void *client_data)
 {
-   struct sock *sk = client_data;
+   struct vmci_transport *trans = client_data;
const struct vmci_event_payload_qp *e_payload;
-   struct vsock_sock *vsk;
 
e_payload = vmci_event_data_const_payload(e_data);
-   vsk = vsock_sk(sk);
-   if (vmci_handle_is_invalid(e_payload->handle))
-   return;
-
-   /* Same rules for locking as for peer_attach_cb(). */
-   local_bh_disable();
-   bh_lock_sock(sk);
 
/* XXX This is lame, we should provide a way to lookup sock