On 6/15/21 5:45 AM, menglong8.d...@gmail.com wrote:
From: Menglong Dong <dong.mengl...@zte.com.cn>
FB_MTU is used in 'tipc_msg_build()' to alloc smaller skb when memory
allocation fails, which can avoid unnecessary sending failures.
The value of FB_MTU now is 3744, and the data size will be:
(3744 + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + \
SKB_DATA_ALIGN(BUF_HEADROOM + BUF_TAILROOM + 3))
which is larger than one page(4096), and two pages will be allocated.
To avoid it, replace '3744' with a calculation:
FB_MTU=(PAGE_SIZE - \
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) - \
SKB_DATA_ALIGN(BUF_HEADROOM + BUF_TAILROOM + \
EHDR_MAX_SIZE) \
)
which is for crypto skb, and:
FB_MTU_LOCAL=(PAGE_SIZE - SKB_DATA_ALIGN(BUF_HEADROOM) - \
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) \
)
which is for local message.
And BUF_HEADROOM is defined as non-crypto version.
What's more, alloc_skb_fclone() will call SKB_DATA_ALIGN for data size,
and it's not necessary to make alignment for buf_size in
tipc_buf_acquire(). So, just remove it.
Fixes: 4c94cc2d3d57 ("tipc: fall back to smaller MTU if allocation of local send skb
fails")
Signed-off-by: Menglong Dong <dong.mengl...@zte.com.cn>
---
V3:
- split tipc_msg_build to tipc_msg_build and tipc_msg_frag
- introduce tipc_buf_acquire_flex, which is able to alloc skb for
local message
- add the variate 'local' in tipc_msg_build to check if this is a
local message.
V2:
- define FB_MTU in msg.c instead of introduce a new file
- remove align for buf_size in tipc_buf_acquire()
---
net/tipc/bcast.c | 2 +-
net/tipc/msg.c | 168 +++++++++++++++++++++++++++++------------------
net/tipc/msg.h | 3 +-
3 files changed, 108 insertions(+), 65 deletions(-)
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index d4beca895992..9daace9542f4 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -699,7 +699,7 @@ int tipc_bcast_init(struct net *net)
spin_lock_init(&tipc_net(net)->bclock);
if (!tipc_link_bc_create(net, 0, 0, NULL,
- FB_MTU,
+ fb_mtu,
BCLINK_WIN_DEFAULT,
BCLINK_WIN_DEFAULT,
0,
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index ce6ab54822d8..349107e08d6f 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -42,19 +42,46 @@
#include "crypto.h"
#define MAX_FORWARD_SIZE 1024
+#define BUF_HEADROOM ALIGN(LL_MAX_HEADER + 48, 16)
+
#ifdef CONFIG_TIPC_CRYPTO
-#define BUF_HEADROOM ALIGN(((LL_MAX_HEADER + 48) + EHDR_MAX_SIZE), 16)
-#define BUF_TAILROOM (TIPC_AES_GCM_TAG_SIZE)
+#define BUF_TAILROOM TIPC_AES_GCM_TAG_SIZE
#else
-#define BUF_HEADROOM (LL_MAX_HEADER + 48)
-#define BUF_TAILROOM 16
+#define EHDR_MAX_SIZE 0
+#define BUF_TAILROOM 0
#endif
We need either a comment or a naming that explains why we are doing all
this, i.e., that we want a buffer that fits within one page.
+#define FB_MTU (PAGE_SIZE - \
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) - \
+ SKB_DATA_ALIGN(BUF_HEADROOM + BUF_TAILROOM + \
+ EHDR_MAX_SIZE) \
+ )
+
+#define FB_MTU_LOCAL (PAGE_SIZE - SKB_DATA_ALIGN(BUF_HEADROOM) - \
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) \
+ )
+
+const int fb_mtu = FB_MTU;
+
static unsigned int align(unsigned int i)
{
return (i + 3) & ~3u;
}
This one should be completely replaced with the ALIGN macro, or maybe we
should define our own ALIGN4() macro based on that, unnless there
already is some.
But that should be a separate patch.
+static inline struct sk_buff *tipc_alloc_skb(int headroom, int tailroom,
+ int size, gfp_t gfp)
+{
+ struct sk_buff *skb;
+
+ skb = alloc_skb_fclone(size + headroom + tailroom, gfp);
+ if (skb) {
+ skb_reserve(skb, headroom);
+ skb_put(skb, size);
+ skb->next = NULL;
+ }
+ return skb;
+}
+
/**
* tipc_buf_acquire - creates a TIPC message buffer
* @size: message size (including TIPC header)
@@ -68,20 +95,17 @@ static unsigned int align(unsigned int i)
*/
struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp)
{
- struct sk_buff *skb;
-#ifdef CONFIG_TIPC_CRYPTO
- unsigned int buf_size = (BUF_HEADROOM + size + BUF_TAILROOM + 3) & ~3u;
-#else
- unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u;
-#endif
+ return tipc_alloc_skb(BUF_HEADROOM + EHDR_MAX_SIZE,
+ BUF_TAILROOM, size, gfp);
+}
So far, so good.
- skb = alloc_skb_fclone(buf_size, gfp);
- if (skb) {
- skb_reserve(skb, BUF_HEADROOM);
- skb_put(skb, size);
- skb->next = NULL;
- }
- return skb
;
+static struct sk_buff *tipc_buf_acquire_flex(u32 size, bool local,
+ gfp_t gfp)
I feel this function is overkill and way too intrusive for my comfort,
especially for such a marginal fix this is meant to be.
We sure may save a few bytes when allocating local buffers, but most
buffers are not so close to a page limit a that it will make any difference.
And even if we happen to save a page now and then, it is not worth it.
Stick to tipc_buf_acquire() where it is used now, so we don“t have to
change any more code.
BR
///jon
+{
+ if (local)
+ return tipc_alloc_skb(BUF_HEADROOM, 0, size, gfp);
+ else
+ return tipc_buf_acquire(size, gfp);
}
void tipc_msg_init(u32 own_node, struct tipc_msg *m, u32 user, u32 type,
@@ -357,26 +381,12 @@ int tipc_msg_fragment(struct sk_buff *skb, const struct
tipc_msg *hdr,
return -ENOMEM;
}
-/**
- * tipc_msg_build - create buffer chain containing specified header and data
- * @mhdr: Message header, to be prepended to data
- * @m: User message
- * @offset: buffer offset for fragmented messages (FIXME)
- * @dsz: Total length of user data
- * @pktmax: Max packet size that can be used
- * @list: Buffer or chain of buffers to be returned to caller
- *
- * Note that the recursive call we are making here is safe, since it can
- * logically go only one further level down.
- *
- * Return: message data size or errno: -ENOMEM, -EFAULT
- */
-int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset,
- int dsz, int pktmax, struct sk_buff_head *list)
+static int tipc_msg_frag(struct tipc_msg *mhdr, struct msghdr *m, int dsz,
+ int pktmax, struct sk_buff_head *list,
+ bool local)
Same comment as above.
{
int mhsz = msg_hdr_sz(mhdr);
struct tipc_msg pkthdr;
- int msz = mhsz + dsz;
int pktrem = pktmax;
struct sk_buff *skb;
int drem = dsz;
@@ -385,33 +395,6 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr
*m, int offset,
int pktsz;
int rc;
- msg_set_size(mhdr, msz);
-
- /* No fragmentation needed? */
- if (likely(msz <= pktmax)) {
- skb = tipc_buf_acquire(msz, GFP_KERNEL);
-
- /* Fall back to smaller MTU if node local message */
- if (unlikely(!skb)) {
- if (pktmax != MAX_MSG_SIZE)
- return -ENOMEM;
- rc = tipc_msg_build(mhdr, m, offset, dsz, FB_MTU, list);
- if (rc != dsz)
- return rc;
- if (tipc_msg_assemble(list))
- return dsz;
- return -ENOMEM;
- }
- skb_orphan(skb);
- __skb_queue_tail(list, skb);
- skb_copy_to_linear_data(skb, mhdr, mhsz);
- pktpos = skb->data + mhsz;
- if (copy_from_iter_full(pktpos, dsz, &m->msg_iter))
- return dsz;
- rc = -EFAULT;
- goto error;
- }
-
/* Prepare reusable fragment header */
tipc_msg_init(msg_prevnode(mhdr), &pkthdr, MSG_FRAGMENTER,
FIRST_FRAGMENT, INT_H_SIZE, msg_destnode(mhdr));
@@ -420,7 +403,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
int offset,
msg_set_importance(&pkthdr, msg_importance(mhdr));
/* Prepare first fragment */
- skb = tipc_buf_acquire(pktmax, GFP_KERNEL);
+ skb = tipc_buf_acquire_flex(pktmax, local, GFP_KERNEL);
if (!skb)
return -ENOMEM;
skb_orphan(skb);
@@ -451,7 +434,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
int offset,
pktsz = drem + INT_H_SIZE;
else
pktsz = pktmax;
- skb = tipc_buf_acquire(pktsz, GFP_KERNEL);
+ skb = tipc_buf_acquire_flex(pktsz, local, GFP_KERNEL);
if (!skb) {
rc = -ENOMEM;
goto error;
@@ -474,6 +457,65 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr
*m, int offset,
return rc;
}
+/**
+ * tipc_msg_build - create buffer chain containing specified header and data
+ * @mhdr: Message header, to be prepended to data
+ * @m: User message
+ * @offset: buffer offset for fragmented messages (FIXME)
+ * @dsz: Total length of user data
+ * @pktmax: Max packet size that can be used
+ * @list: Buffer or chain of buffers to be returned to caller
+ *
+ * Note that the recursive call we are making here is safe, since it can
+ * logically go only one further level down.
+ *
+ * Return: message data size or errno: -ENOMEM, -EFAULT
+ */
+int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset,
+ int dsz, int pktmax, struct sk_buff_head *list)
+{
+ int mhsz = msg_hdr_sz(mhdr);
+ int msz = mhsz + dsz;
+ struct sk_buff *skb;
+ bool local = false;
+ char *pktpos;
+ int rc;
+
+ msg_set_size(mhdr, msz);
+ if (pktmax == MAX_MSG_SIZE)
+ local = true;
+
+ /* Fragmentation needed */
+ if (unlikely(msz <= pktmax))
+ return tipc_msg_frag(mhdr, m, dsz, pktmax, list, local);
+
+ skb = tipc_buf_acquire_flex(msz, local, GFP_KERNEL);
+
+ /* Fall back to smaller MTU if node local message */
+ if (unlikely(!skb))
+ goto try_frag;
+
+ skb_orphan(skb);
+ skb_copy_to_linear_data(skb, mhdr, mhsz);
+ pktpos = skb->data + mhsz;
+ if (copy_from_iter_full(pktpos, dsz, &m->msg_iter)) {
+ __skb_queue_tail(list, skb);
+ return dsz;
+ }
+ __skb_queue_head_init(list);
+ return -EFAULT;
+
+try_frag:
+ if (!local)
+ return -ENOMEM;
+ rc = tipc_msg_frag(mhdr, m, dsz, FB_MTU_LOCAL, list, true);
+ if (rc != dsz)
+ return rc;
+ if (tipc_msg_assemble(list))
+ return dsz;
+ return -ENOMEM;
+}
+
/**
* tipc_msg_bundle - Append contents of a buffer to tail of an existing one
* @bskb: the bundle buffer to append to
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 5d64596ba987..2c214691037c 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -99,9 +99,10 @@ struct plist;
#define MAX_H_SIZE 60 /* Largest possible TIPC header size */
#define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE)
-#define FB_MTU 3744
#define TIPC_MEDIA_INFO_OFFSET 5
+extern const int fb_mtu;
+
struct tipc_skb_cb {
union {
struct {
_______________________________________________
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion