Signed-off-by: Menglong Dong <[email protected]>
---
include/linux/skbuff.h | 32 ++++++++++++++++++++++++++++++++
include/trace/events/skb.h | 30 ++++++++++++++++++++++++++++++
net/core/dev.c | 2 ++
net/core/net-traces.c | 1 +
net/core/skbuff.c | 20 ++++++++++++++++++++
net/ipv4/tcp.c | 5 +++++
net/ipv4/tcp_input.c | 1 +
net/ipv4/tcp_ipv4.c | 2 ++
net/ipv4/tcp_output.c | 7 +++++++
9 files changed, 100 insertions(+)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 39f1d16f3628..77fcda96f1fd 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1104,6 +1104,35 @@ struct sk_buff {
#endif
#define SKB_BF_MONO_TC_OFFSET offsetof(struct sk_buff,
__mono_tc_offset)
+enum skb_latency_type {
+ /* the latency from the skb being queued in the send buffer to the
+ * skb is passed to L3 from L4. The latency in this case can be
+ * caused by the nagle.
+ */
+ SKB_LATENCY_SEND,
+ /* the latency from L3 to the skb entering the paccket scheduler
+ * in output path.
+ */
+ SKB_LATENCY_SCHED,
+ /* the latency from L3 to the skb entering the driver in output path */
+ SKB_LATENCY_NIC,
+ /* the latency from L3 to the skb being acknowledged by peer. This
+ * including the latency caused by delay ack. If the skb is
+ * retransmitted, this imply the last retransmitted skb.
+ */
+ SKB_LATENCY_ACK,
+ /* the latency from the driver to the skb entering the L4 in input path
*/
+ SKB_LATENCY_RECV,
+ /* the latency from the driver to the skb being peeked from the
+ * recv queue by the user in input path.
+ */
+ SKB_LATENCY_PICK,
+ SKB_LATENCY_MAX,
+};
+
+extern int skb_latency_regfunc(void);
+extern void skb_latency_unregfunc(void);
+
#ifdef __KERNEL__
/*
* Handling routines are only of interest to the kernel
@@ -4500,6 +4529,9 @@ static inline void skb_tx_timestamp(struct sk_buff *skb)
skb_tstamp_tx(skb, NULL);
}
+void skb_latency_notify(struct sk_buff *skb, struct sock *sk,
+ enum skb_latency_type type);
+
/**
* skb_complete_wifi_ack - deliver skb with wifi status
*
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index b877133cd93a..26825dac4347 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -92,6 +92,36 @@ TRACE_EVENT(skb_copy_datagram_iovec,
TP_printk("skbaddr=%p len=%d", __entry->skbaddr, __entry->len)
);
+TRACE_EVENT_FN(skb_latency,
+
+ TP_PROTO(struct sk_buff *skb, struct sock *sk, enum skb_latency_type
type),
+
+ TP_ARGS(skb, sk, type),
+
+ TP_STRUCT__entry(
+ __field(void *, skbaddr)
+ __field(void *, skaddr)
+ __field(u64, latency)
+ __field(enum skb_latency_type, type)
+ ),
+
+ TP_fast_assign(
+ __entry->skbaddr = skb;
+ __entry->skaddr = sk;
+ __entry->type = type;
+ __entry->latency = skb->tstamp ?
+ (skb->tstamp_type == SKB_CLOCK_REALTIME ?
+ net_timedelta(skb->tstamp) :
+ ktime_get_ns() - skb->tstamp) : 0;
+ ),
+
+ TP_printk("skbaddr=%p skaddr=%p type=%d latency=%lluns",
+ __entry->skbaddr, __entry->skaddr, __entry->type,
+ __entry->latency),
+
+ skb_latency_regfunc, skb_latency_unregfunc
+);
+
#endif /* _TRACE_SKB_H */
/* This part must be outside protection */
diff --git a/net/core/dev.c b/net/core/dev.c
index ea5fbcd133ae..ab036bbadc9a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4347,6 +4347,8 @@ int __dev_queue_xmit(struct sk_buff *skb, struct
net_device *sb_dev)
if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP))
__skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SCHED);
+ skb_latency_notify(skb, NULL, SKB_LATENCY_SCHED);
+
/* Disable soft irqs for various locks below. Also
* stops preemption for RCU.
*/
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index f2fa34b1d78d..a0c1e32a5928 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -57,6 +57,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_event_send_dead);
EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_cleanup_and_release);
EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
+EXPORT_TRACEPOINT_SYMBOL_GPL(skb_latency);
EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 00afeb90c23a..041947d7049c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -5521,6 +5521,7 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
{
struct sock *sk = skb->sk;
+ skb_latency_notify(skb, sk, SKB_LATENCY_NIC);
if (!skb_may_tx_timestamp(sk, false))
goto err;
@@ -5539,6 +5540,24 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
}
EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
+void skb_latency_notify(struct sk_buff *skb, struct sock *sk,
+ enum skb_latency_type type)
+{
+ trace_skb_latency(skb, sk, type);
+}
+EXPORT_SYMBOL_GPL(skb_latency_notify);
+
+int skb_latency_regfunc(void)
+{
+ net_enable_timestamp();
+ return 0;
+}
+
+void skb_latency_unregfunc(void)
+{
+ net_disable_timestamp();
+}
+
void __skb_tstamp_tx(struct sk_buff *orig_skb,
const struct sk_buff *ack_skb,
struct skb_shared_hwtstamps *hwtstamps,
@@ -5599,6 +5618,7 @@ EXPORT_SYMBOL_GPL(__skb_tstamp_tx);
void skb_tstamp_tx(struct sk_buff *orig_skb,
struct skb_shared_hwtstamps *hwtstamps)
{
+ skb_latency_notify(orig_skb, NULL, SKB_LATENCY_NIC);
return __skb_tstamp_tx(orig_skb, NULL, hwtstamps, orig_skb->sk,
SCM_TSTAMP_SND);
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 82cc4a5633ce..b4e7bd9c4b6f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -283,6 +283,7 @@
#include <net/busy_poll.h>
#include <net/hotdata.h>
#include <trace/events/tcp.h>
+#include <trace/events/skb.h>
#include <net/rps.h>
#include "../core/devmem.h"
@@ -688,6 +689,7 @@ void tcp_skb_entail(struct sock *sk, struct sk_buff *skb)
tp->nonagle &= ~TCP_NAGLE_PUSH;
tcp_slow_start_after_idle_check(sk);
+ skb_set_delivery_time(skb, tp->tcp_clock_cache, SKB_CLOCK_MONOTONIC);
}
static inline void tcp_mark_urg(struct tcp_sock *tp, int flags)
@@ -1137,6 +1139,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr
*msg, size_t size)
/* Ok commence sending. */
copied = 0;
+ tcp_mstamp_refresh(tp);
restart:
mss_now = tcp_send_mss(sk, &size_goal, flags);
@@ -1318,6 +1321,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr
*msg, size_t size)
goto do_error;
mss_now = tcp_send_mss(sk, &size_goal, flags);
+ tcp_mstamp_refresh(tp);
}
out:
@@ -1519,6 +1523,7 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied)
static void tcp_eat_recv_skb(struct sock *sk, struct sk_buff *skb)
{
+ skb_latency_notify(skb, sk, SKB_LATENCY_PICK);
__skb_unlink(skb, &sk->sk_receive_queue);
if (likely(skb->destructor == sock_rfree)) {
sock_rfree(skb);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index cc05ec1faac8..99218ec5faa8 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3293,6 +3293,7 @@ static void tcp_ack_tstamp(struct sock *sk, struct
sk_buff *skb,
{
const struct skb_shared_info *shinfo;
+ skb_latency_notify(skb, sk, SKB_LATENCY_ACK);
/* Avoid cache line misses to get skb_shinfo() and shinfo->tx_flags */
if (likely(!TCP_SKB_CB(skb)->txstamp_ack))
return;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 985028434f64..de7a9fd7773a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -85,6 +85,7 @@
#include <linux/scatterlist.h>
#include <trace/events/tcp.h>
+#include <trace/events/skb.h>
#ifdef CONFIG_TCP_MD5SIG
static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key
*key,
@@ -2222,6 +2223,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
if (!sk)
goto no_tcp_socket;
+ skb_latency_notify(skb, sk, SKB_LATENCY_RECV);
if (sk->sk_state == TCP_TIME_WAIT)
goto do_time_wait;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 08772395690d..db4bae23986f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -48,6 +48,7 @@
#include <linux/skbuff_ref.h>
#include <trace/events/tcp.h>
+#include <trace/events/skb.h>
/* Refresh clocks of a TCP socket,
* ensuring monotically increasing values.
@@ -2512,6 +2513,7 @@ static int tcp_mtu_probe(struct sock *sk)
skb = tcp_send_head(sk);
skb_copy_decrypted(nskb, skb);
mptcp_skb_ext_copy(nskb, skb);
+ nskb->tstamp = skb->tstamp;
TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
@@ -2540,6 +2542,7 @@ static int tcp_mtu_probe(struct sock *sk)
break;
}
tcp_init_tso_segs(nskb, nskb->len);
+ skb_latency_notify(nskb, sk, SKB_LATENCY_SEND);
/* We're ready to send. If this fails, the probe will
* be resegmented into mss-sized pieces by tcp_write_xmit().
@@ -2827,6 +2830,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int
mss_now, int nonagle,
if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq)
break;
+ skb_latency_notify(skb, sk, SKB_LATENCY_SEND);
if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
break;
@@ -3616,6 +3620,7 @@ void tcp_send_fin(struct sock *sk)
/* FIN eats a sequence byte, write_seq advanced by
tcp_queue_skb(). */
tcp_init_nondata_skb(skb, tp->write_seq,
TCPHDR_ACK | TCPHDR_FIN);
+ skb_set_delivery_time(skb, tcp_clock_ns(), SKB_CLOCK_MONOTONIC);
tcp_queue_skb(sk, skb);
}
__tcp_push_pending_frames(sk, tcp_current_mss(sk), TCP_NAGLE_OFF);
@@ -4048,6 +4053,7 @@ static int tcp_send_syn_data(struct sock *sk, struct
sk_buff *syn)
goto done;
}
+ skb_set_delivery_time(syn_data, tp->tcp_clock_cache, SKB_CLOCK_MONOTONIC);
/* data was not sent, put it in write_queue */
__skb_queue_tail(&sk->sk_write_queue, syn_data);
tp->packets_out -= tcp_skb_pcount(syn_data);
@@ -4353,6 +4359,7 @@ int tcp_write_wakeup(struct sock *sk, int mib)
} else if (!tcp_skb_pcount(skb))
tcp_set_skb_tso_segs(skb, mss);
+ skb_latency_notify(skb, sk, SKB_LATENCY_SEND);
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
if (!err)