Re: [RFC PATCH 02/14] packet: implement PACKET_MEMREG setsockopt

2017-11-03 Thread Björn Töpel
2017-11-03 4:00 GMT+01:00 Willem de Bruijn :
> On Tue, Oct 31, 2017 at 9:41 PM, Björn Töpel  wrote:
>> From: Björn Töpel 
>>
>> Here, the PACKET_MEMREG setsockopt is implemented for the AF_PACKET
>> protocol family. PACKET_MEMREG allows the user to register memory
>> regions that can be used by AF_PACKET V4 as packet data buffers.
>>
>> Signed-off-by: Björn Töpel 
>> ---
>> +/*** V4 QUEUE OPERATIONS ***/
>> +
>> +/**
>> + * tp4q_umem_new - Creates a new umem (packet buffer)
>> + *
>> + * @addr: The address to the umem
>> + * @size: The size of the umem
>> + * @frame_size: The size of each frame, between 2K and PAGE_SIZE
>> + * @data_headroom: The desired data headroom before start of the packet
>> + *
>> + * Returns a pointer to the new umem or NULL for failure
>> + **/
>> +static inline struct tp4_umem *tp4q_umem_new(unsigned long addr, size_t 
>> size,
>> +unsigned int frame_size,
>> +unsigned int data_headroom)
>> +{
>> +   struct tp4_umem *umem;
>> +   unsigned int nframes;
>> +
>> +   if (frame_size < TP4_UMEM_MIN_FRAME_SIZE || frame_size > PAGE_SIZE) {
>> +   /* Strictly speaking we could support this, if:
>> +* - huge pages, or*
>> +* - using an IOMMU, or
>> +* - making sure the memory area is consecutive
>> +* but for now, we simply say "computer says no".
>> +*/
>> +   return ERR_PTR(-EINVAL);
>> +   }
>> +
>> +   if (!is_power_of_2(frame_size))
>> +   return ERR_PTR(-EINVAL);
>> +
>> +   if (!PAGE_ALIGNED(addr)) {
>> +   /* Memory area has to be page size aligned. For
>> +* simplicity, this might change.
>> +*/
>> +   return ERR_PTR(-EINVAL);
>> +   }
>> +
>> +   if ((addr + size) < addr)
>> +   return ERR_PTR(-EINVAL);
>> +
>> +   nframes = size / frame_size;
>> +   if (nframes == 0)
>> +   return ERR_PTR(-EINVAL);
>> +
>> +   data_headroom = ALIGN(data_headroom, 64);
>> +
>> +   if (frame_size - data_headroom - TP4_KERNEL_HEADROOM < 0)
>> +   return ERR_PTR(-EINVAL);
>
> signed comparison on unsigned int

Thanks, will address in next revision!


Re: [RFC PATCH 02/14] packet: implement PACKET_MEMREG setsockopt

2017-11-02 Thread Willem de Bruijn
On Tue, Oct 31, 2017 at 9:41 PM, Björn Töpel  wrote:
> From: Björn Töpel 
>
> Here, the PACKET_MEMREG setsockopt is implemented for the AF_PACKET
> protocol family. PACKET_MEMREG allows the user to register memory
> regions that can be used by AF_PACKET V4 as packet data buffers.
>
> Signed-off-by: Björn Töpel 
> ---
> +/*** V4 QUEUE OPERATIONS ***/
> +
> +/**
> + * tp4q_umem_new - Creates a new umem (packet buffer)
> + *
> + * @addr: The address to the umem
> + * @size: The size of the umem
> + * @frame_size: The size of each frame, between 2K and PAGE_SIZE
> + * @data_headroom: The desired data headroom before start of the packet
> + *
> + * Returns a pointer to the new umem or NULL for failure
> + **/
> +static inline struct tp4_umem *tp4q_umem_new(unsigned long addr, size_t size,
> +unsigned int frame_size,
> +unsigned int data_headroom)
> +{
> +   struct tp4_umem *umem;
> +   unsigned int nframes;
> +
> +   if (frame_size < TP4_UMEM_MIN_FRAME_SIZE || frame_size > PAGE_SIZE) {
> +   /* Strictly speaking we could support this, if:
> +* - huge pages, or*
> +* - using an IOMMU, or
> +* - making sure the memory area is consecutive
> +* but for now, we simply say "computer says no".
> +*/
> +   return ERR_PTR(-EINVAL);
> +   }
> +
> +   if (!is_power_of_2(frame_size))
> +   return ERR_PTR(-EINVAL);
> +
> +   if (!PAGE_ALIGNED(addr)) {
> +   /* Memory area has to be page size aligned. For
> +* simplicity, this might change.
> +*/
> +   return ERR_PTR(-EINVAL);
> +   }
> +
> +   if ((addr + size) < addr)
> +   return ERR_PTR(-EINVAL);
> +
> +   nframes = size / frame_size;
> +   if (nframes == 0)
> +   return ERR_PTR(-EINVAL);
> +
> +   data_headroom = ALIGN(data_headroom, 64);
> +
> +   if (frame_size - data_headroom - TP4_KERNEL_HEADROOM < 0)
> +   return ERR_PTR(-EINVAL);

signed comparison on unsigned int


[RFC PATCH 02/14] packet: implement PACKET_MEMREG setsockopt

2017-10-31 Thread Björn Töpel
From: Björn Töpel 

Here, the PACKET_MEMREG setsockopt is implemented for the AF_PACKET
protocol family. PACKET_MEMREG allows the user to register memory
regions that can be used by AF_PACKET V4 as packet data buffers.

Signed-off-by: Björn Töpel 
---
 include/linux/tpacket4.h | 101 +
 net/packet/af_packet.c   | 163 +++
 net/packet/internal.h|   4 ++
 3 files changed, 268 insertions(+)
 create mode 100644 include/linux/tpacket4.h

diff --git a/include/linux/tpacket4.h b/include/linux/tpacket4.h
new file mode 100644
index ..fcf4c333c78d
--- /dev/null
+++ b/include/linux/tpacket4.h
@@ -0,0 +1,101 @@
+/*
+ *  tpacket v4
+ *  Copyright(c) 2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _LINUX_TPACKET4_H
+#define _LINUX_TPACKET4_H
+
+#define TP4_UMEM_MIN_FRAME_SIZE 2048
+#define TP4_KERNEL_HEADROOM 256 /* Headrom for XDP */
+
+struct tp4_umem {
+   struct pid *pid;
+   struct page **pgs;
+   unsigned int npgs;
+   size_t size;
+   unsigned long address;
+   unsigned int frame_size;
+   unsigned int frame_size_log2;
+   unsigned int nframes;
+   unsigned int nfpplog2; /* num frames per page in log2 */
+   unsigned int data_headroom;
+};
+
+/*** V4 QUEUE OPERATIONS ***/
+
+/**
+ * tp4q_umem_new - Creates a new umem (packet buffer)
+ *
+ * @addr: The address to the umem
+ * @size: The size of the umem
+ * @frame_size: The size of each frame, between 2K and PAGE_SIZE
+ * @data_headroom: The desired data headroom before start of the packet
+ *
+ * Returns a pointer to the new umem or NULL for failure
+ **/
+static inline struct tp4_umem *tp4q_umem_new(unsigned long addr, size_t size,
+unsigned int frame_size,
+unsigned int data_headroom)
+{
+   struct tp4_umem *umem;
+   unsigned int nframes;
+
+   if (frame_size < TP4_UMEM_MIN_FRAME_SIZE || frame_size > PAGE_SIZE) {
+   /* Strictly speaking we could support this, if:
+* - huge pages, or*
+* - using an IOMMU, or
+* - making sure the memory area is consecutive
+* but for now, we simply say "computer says no".
+*/
+   return ERR_PTR(-EINVAL);
+   }
+
+   if (!is_power_of_2(frame_size))
+   return ERR_PTR(-EINVAL);
+
+   if (!PAGE_ALIGNED(addr)) {
+   /* Memory area has to be page size aligned. For
+* simplicity, this might change.
+*/
+   return ERR_PTR(-EINVAL);
+   }
+
+   if ((addr + size) < addr)
+   return ERR_PTR(-EINVAL);
+
+   nframes = size / frame_size;
+   if (nframes == 0)
+   return ERR_PTR(-EINVAL);
+
+   data_headroom = ALIGN(data_headroom, 64);
+
+   if (frame_size - data_headroom - TP4_KERNEL_HEADROOM < 0)
+   return ERR_PTR(-EINVAL);
+
+   umem = kzalloc(sizeof(*umem), GFP_KERNEL);
+   if (!umem)
+   return ERR_PTR(-ENOMEM);
+
+   umem->pid = get_task_pid(current, PIDTYPE_PID);
+   umem->size = size;
+   umem->address = addr;
+   umem->frame_size = frame_size;
+   umem->frame_size_log2 = ilog2(frame_size);
+   umem->nframes = nframes;
+   umem->nfpplog2 = ilog2(PAGE_SIZE / frame_size);
+   umem->data_headroom = data_headroom;
+
+   return umem;
+}
+
+#endif /* _LINUX_TPACKET4_H */
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 9603f6ff17a4..b39be424ec0e 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -89,11 +89,15 @@
 #include 
 #include 
 #include 
+#include 
 #ifdef CONFIG_INET
 #include 
 #endif
 #include 
 #include 
+#include 
+#include 
+#include 
 
 #include "internal.h"
 
@@ -2975,6 +2979,132 @@ static int packet_sendmsg(struct socket *sock, struct 
msghdr *msg, size_t len)
return packet_snd(sock, msg, len);
 }
 
+static void
+packet_umem_unpin_pages(struct tp4_umem *umem)
+{
+   unsigned int i;
+
+   for (i = 0; i < umem->npgs; i++) {
+   struct page *page = umem->pgs[i];
+
+   set_page_dirty_lock(page);
+   put_page(page);
+   }
+   kfree(umem->pgs);
+   umem->pgs = NULL;
+}
+
+static void
+packet_umem_free(struct tp4_umem *umem)
+{
+   struct mm_struct *mm;
+   struct