Re: [PATCH 3/3] [NET] Add IP(V6)_PMTUDISC_RPOBE
John Heffner [EMAIL PROTECTED] writes: Add IP(V6)_PMTUDISC_PROBE value for IP(V6)_MTU_DISCOVER. This option forces us not to fragment, but does not make use of the kernel path MTU discovery. That is, it allows for user-mode MTU probing (or, packetization-layer path MTU discovery). This is particularly useful for diagnostic utilities, like traceroute/tracepath. You should probably send a manpages update to the manpages maintainer too (cc'ed with fullquote) -Andi Signed-off-by: John Heffner [EMAIL PROTECTED] --- include/linux/in.h |1 + include/linux/in6.h |1 + include/linux/skbuff.h |3 ++- include/net/ip.h |2 +- net/core/skbuff.c|2 ++ net/ipv4/ip_output.c | 14 ++ net/ipv4/ip_sockglue.c |2 +- net/ipv4/raw.c |3 +++ net/ipv6/ip6_output.c| 12 net/ipv6/ipv6_sockglue.c |2 +- net/ipv6/raw.c |3 +++ 11 files changed, 33 insertions(+), 12 deletions(-) diff --git a/include/linux/in.h b/include/linux/in.h index 1912e7c..2dc1f8a 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -83,6 +83,7 @@ struct in_addr { #define IP_PMTUDISC_DONT 0 /* Never send DF frames */ #define IP_PMTUDISC_WANT 1 /* Use per route hints */ #define IP_PMTUDISC_DO 2 /* Always DF */ +#define IP_PMTUDISC_PROBE3 /* Ignore dst pmtu */ #define IP_MULTICAST_IF 32 #define IP_MULTICAST_TTL 33 diff --git a/include/linux/in6.h b/include/linux/in6.h index 4e8350a..d559fac 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -179,6 +179,7 @@ struct in6_flowlabel_req #define IPV6_PMTUDISC_DONT 0 #define IPV6_PMTUDISC_WANT 1 #define IPV6_PMTUDISC_DO 2 +#define IPV6_PMTUDISC_PROBE 3 /* Flowlabel */ #define IPV6_FLOWLABEL_MGR 32 diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4ff3940..64038b4 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -284,7 +284,8 @@ struct sk_buff { nfctinfo:3; __u8pkt_type:3, fclone:2, - ipvs_property:1; + ipvs_property:1, + ign_dst_mtu; __be16 protocol; void(*destructor)(struct sk_buff *skb); diff --git a/include/net/ip.h b/include/net/ip.h index e79c3e3..f5874a3 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -201,7 +201,7 @@ int ip_decrease_ttl(struct iphdr *iph) static inline int ip_dont_fragment(struct sock *sk, struct dst_entry *dst) { - return (inet_sk(sk)-pmtudisc == IP_PMTUDISC_DO || + return (inet_sk(sk)-pmtudisc = IP_PMTUDISC_DO || (inet_sk(sk)-pmtudisc == IP_PMTUDISC_WANT !(dst_metric(dst, RTAX_LOCK)(1RTAX_MTU; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 702fa8f..5c8515c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -474,6 +474,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) C(ipvs_property); #endif + C(ign_dst_mtu); C(protocol); n-destructor = NULL; C(mark); @@ -549,6 +550,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) new-ipvs_property = old-ipvs_property; #endif + new-ign_dst_mtu= old-ign_dst_mtu; #ifdef CONFIG_BRIDGE_NETFILTER new-nf_bridge = old-nf_bridge; nf_bridge_get(old-nf_bridge); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 90bdd53..a7e8944 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -201,7 +201,8 @@ static inline int ip_finish_output(struct sk_buff *skb) return dst_output(skb); } #endif - if (skb-len dst_mtu(skb-dst) !skb_is_gso(skb)) + if (skb-len dst_mtu(skb-dst) + !skb-ign_dst_mtu !skb_is_gso(skb)) return ip_fragment(skb, ip_finish_output2); else return ip_finish_output2(skb); @@ -801,7 +802,9 @@ int ip_append_data(struct sock *sk, inet-cork.addr = ipc-addr; } dst_hold(rt-u.dst); - inet-cork.fragsize = mtu = dst_mtu(rt-u.dst.path); + inet-cork.fragsize = mtu = inet-pmtudisc == IP_PMTUDISC_PROBE ? + rt-u.dst.dev-mtu : + dst_mtu(rt-u.dst.path); inet-cork.rt = rt; inet-cork.length = 0; sk-sk_sndmsg_page = NULL; @@ -1220,13 +1223,16 @@ int ip_push_pending_frames(struct sock *sk)
Re: [PATCH 3/3] [NET] Add IP(V6)_PMTUDISC_RPOBE
From: John Heffner [EMAIL PROTECTED] Date: Fri, 23 Mar 2007 20:06:46 -0400 Add IP(V6)_PMTUDISC_PROBE value for IP(V6)_MTU_DISCOVER. This option forces us not to fragment, but does not make use of the kernel path MTU discovery. That is, it allows for user-mode MTU probing (or, packetization-layer path MTU discovery). This is particularly useful for diagnostic utilities, like traceroute/tracepath. Signed-off-by: John Heffner [EMAIL PROTECTED] Also applied to net-2.6.22, thanks John. I made a slight change: diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4ff3940..64038b4 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -284,7 +284,8 @@ struct sk_buff { nfctinfo:3; __u8pkt_type:3, fclone:2, - ipvs_property:1; + ipvs_property:1, + ign_dst_mtu; __be16 protocol; void(*destructor)(struct sk_buff *skb); I marked ign_dst_mtu with a bit field size of one since this appears to be a boolean I take it this is what you mean to do here. Otherwise it adds another __u8 to struct sk_buff. :-) - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/3] [NET] Add IP(V6)_PMTUDISC_RPOBE
Add IP(V6)_PMTUDISC_PROBE value for IP(V6)_MTU_DISCOVER. This option forces us not to fragment, but does not make use of the kernel path MTU discovery. That is, it allows for user-mode MTU probing (or, packetization-layer path MTU discovery). This is particularly useful for diagnostic utilities, like traceroute/tracepath. Signed-off-by: John Heffner [EMAIL PROTECTED] --- include/linux/in.h |1 + include/linux/in6.h |1 + include/linux/skbuff.h |3 ++- include/net/ip.h |2 +- net/core/skbuff.c|2 ++ net/ipv4/ip_output.c | 14 ++ net/ipv4/ip_sockglue.c |2 +- net/ipv4/raw.c |3 +++ net/ipv6/ip6_output.c| 12 net/ipv6/ipv6_sockglue.c |2 +- net/ipv6/raw.c |3 +++ 11 files changed, 33 insertions(+), 12 deletions(-) diff --git a/include/linux/in.h b/include/linux/in.h index 1912e7c..2dc1f8a 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -83,6 +83,7 @@ struct in_addr { #define IP_PMTUDISC_DONT 0 /* Never send DF frames */ #define IP_PMTUDISC_WANT 1 /* Use per route hints */ #define IP_PMTUDISC_DO 2 /* Always DF*/ +#define IP_PMTUDISC_PROBE 3 /* Ignore dst pmtu */ #define IP_MULTICAST_IF32 #define IP_MULTICAST_TTL 33 diff --git a/include/linux/in6.h b/include/linux/in6.h index 4e8350a..d559fac 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -179,6 +179,7 @@ struct in6_flowlabel_req #define IPV6_PMTUDISC_DONT 0 #define IPV6_PMTUDISC_WANT 1 #define IPV6_PMTUDISC_DO 2 +#define IPV6_PMTUDISC_PROBE3 /* Flowlabel */ #define IPV6_FLOWLABEL_MGR 32 diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4ff3940..64038b4 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -284,7 +284,8 @@ struct sk_buff { nfctinfo:3; __u8pkt_type:3, fclone:2, - ipvs_property:1; + ipvs_property:1, + ign_dst_mtu; __be16 protocol; void(*destructor)(struct sk_buff *skb); diff --git a/include/net/ip.h b/include/net/ip.h index e79c3e3..f5874a3 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -201,7 +201,7 @@ int ip_decrease_ttl(struct iphdr *iph) static inline int ip_dont_fragment(struct sock *sk, struct dst_entry *dst) { - return (inet_sk(sk)-pmtudisc == IP_PMTUDISC_DO || + return (inet_sk(sk)-pmtudisc = IP_PMTUDISC_DO || (inet_sk(sk)-pmtudisc == IP_PMTUDISC_WANT !(dst_metric(dst, RTAX_LOCK)(1RTAX_MTU; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 702fa8f..5c8515c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -474,6 +474,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) C(ipvs_property); #endif + C(ign_dst_mtu); C(protocol); n-destructor = NULL; C(mark); @@ -549,6 +550,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) new-ipvs_property = old-ipvs_property; #endif + new-ign_dst_mtu= old-ign_dst_mtu; #ifdef CONFIG_BRIDGE_NETFILTER new-nf_bridge = old-nf_bridge; nf_bridge_get(old-nf_bridge); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 90bdd53..a7e8944 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -201,7 +201,8 @@ static inline int ip_finish_output(struct sk_buff *skb) return dst_output(skb); } #endif - if (skb-len dst_mtu(skb-dst) !skb_is_gso(skb)) + if (skb-len dst_mtu(skb-dst) + !skb-ign_dst_mtu !skb_is_gso(skb)) return ip_fragment(skb, ip_finish_output2); else return ip_finish_output2(skb); @@ -801,7 +802,9 @@ int ip_append_data(struct sock *sk, inet-cork.addr = ipc-addr; } dst_hold(rt-u.dst); - inet-cork.fragsize = mtu = dst_mtu(rt-u.dst.path); + inet-cork.fragsize = mtu = inet-pmtudisc == IP_PMTUDISC_PROBE ? + rt-u.dst.dev-mtu : + dst_mtu(rt-u.dst.path); inet-cork.rt = rt; inet-cork.length = 0; sk-sk_sndmsg_page = NULL; @@ -1220,13 +1223,16 @@ int ip_push_pending_frames(struct sock *sk) * to fragment the frame generated here. No matter, what transforms * how transforms change size of the packet, it will come out. */ - if