iPXE is already assuming a conservative MTU in the TCP transmission code path (tcp_xmit_win() is using TCP_PATH_MTU, which is calculated assuming a network-layer MTU of 1280).
However, MTU is also used to determine the TCP Maximum Segment Size. Currently tcpip_mtu() is assuming the network layer can receive full length packets (1500 in the case of IPv4 over Ethernet). In some scenarios this is not a valid assumption, for example when the network is Ethernet encapsulated in something else. This is common in OpenStack virtualised networks. In those cases the administrator may need to configure a lower MTU than the default, either explicitly or using DHCP option 26. Signed-off-by: Dan Callaghan <dcall...@redhat.com> --- src/include/ipxe/dhcp.h | 3 +++ src/include/ipxe/netdevice.h | 1 + src/include/ipxe/settings.h | 2 ++ src/net/netdev_settings.c | 6 ++++++ src/net/netdevice.c | 22 ++++++++++++++++++++++ src/net/tcpip.c | 3 +-- src/net/udp/dhcp.c | 6 +++--- 7 files changed, 38 insertions(+), 5 deletions(-) diff --git a/src/include/ipxe/dhcp.h b/src/include/ipxe/dhcp.h index b699b31..189468a 100644 --- a/src/include/ipxe/dhcp.h +++ b/src/include/ipxe/dhcp.h @@ -83,6 +83,9 @@ struct dhcp_packet; /** Root path */ #define DHCP_ROOT_PATH 17 +/** MTU */ +#define DHCP_MTU 26 + /** Vendor encapsulated options */ #define DHCP_VENDOR_ENCAP 43 diff --git a/src/include/ipxe/netdevice.h b/src/include/ipxe/netdevice.h index a1d207f..4fb45c7 100644 --- a/src/include/ipxe/netdevice.h +++ b/src/include/ipxe/netdevice.h @@ -680,6 +680,7 @@ netdev_rx_frozen ( struct net_device *netdev ) { return ( netdev->state & NETDEV_RX_FROZEN ); } +extern size_t netdev_mtu ( struct net_device *netdev ); extern void netdev_rx_freeze ( struct net_device *netdev ); extern void netdev_rx_unfreeze ( struct net_device *netdev ); extern void netdev_link_err ( struct net_device *netdev, int rc ); diff --git a/src/include/ipxe/settings.h b/src/include/ipxe/settings.h index 341fc3c..2d8f241 100644 --- a/src/include/ipxe/settings.h +++ b/src/include/ipxe/settings.h @@ -466,6 +466,8 @@ mac_setting __setting ( SETTING_NETDEV, mac ); extern const struct setting busid_setting __setting ( SETTING_NETDEV, busid ); extern const struct setting +mtu_setting __setting ( SETTING_NETDEV, mtu ); +extern const struct setting user_class_setting __setting ( SETTING_HOST_EXTRA, user-class ); extern const struct setting manufacturer_setting __setting ( SETTING_HOST_EXTRA, manufacturer ); diff --git a/src/net/netdev_settings.c b/src/net/netdev_settings.c index 7d893a1..73d1d67 100644 --- a/src/net/netdev_settings.c +++ b/src/net/netdev_settings.c @@ -70,6 +70,12 @@ const struct setting ifname_setting __setting ( SETTING_NETDEV, ifname ) = { .description = "Interface name", .type = &setting_type_string, }; +const struct setting mtu_setting __setting ( SETTING_NETDEV, mtu ) = { + .name = "mtu", + .description = "Network-layer Maximum Transmission Unit (MTU)", + .type = &setting_type_uint16, + .tag = DHCP_MTU, +}; /** * Store MAC address setting diff --git a/src/net/netdevice.c b/src/net/netdevice.c index 9df2119..8e6e7f8 100644 --- a/src/net/netdevice.c +++ b/src/net/netdevice.c @@ -127,6 +127,28 @@ static void netdev_notify ( struct net_device *netdev ) { } /** + * Returns the network-layer Maximum Transmission Unit + * + * This is configurable at runtime with the "mtu" setting, either explicitly or + * through DHCP option 26. Otherwise uses the value determined by the driver + * (if any) or the protocol default. + * + * @v Network device + * @ret Computed network-layer MTU + */ +size_t netdev_mtu ( struct net_device *netdev ) { + unsigned long mtu; + + /* Use MTU from settings if one is set */ + if ( fetch_uint_setting ( netdev_settings ( netdev ), &mtu_setting, + &mtu ) >= 0 ) + return mtu; + + /* Use default */ + return ( netdev->max_pkt_len - netdev->ll_protocol->ll_header_len ); +} + +/** * Freeze network device receive queue processing * * @v netdev Network device diff --git a/src/net/tcpip.c b/src/net/tcpip.c index c9e4ee7..9c439a5 100644 --- a/src/net/tcpip.c +++ b/src/net/tcpip.c @@ -144,8 +144,7 @@ size_t tcpip_mtu ( struct sockaddr_tcpip *st_dest ) { return 0; /* Calculate MTU */ - mtu = ( netdev->max_pkt_len - netdev->ll_protocol->ll_header_len - - tcpip_net->header_len ); + mtu = ( netdev_mtu ( netdev ) - tcpip_net->header_len ); return mtu; } diff --git a/src/net/udp/dhcp.c b/src/net/udp/dhcp.c index b9c1fd9..95f8082 100644 --- a/src/net/udp/dhcp.c +++ b/src/net/udp/dhcp.c @@ -91,9 +91,9 @@ static uint8_t dhcp_request_options_data[] = { DHCP_PARAMETER_REQUEST_LIST, DHCP_OPTION ( DHCP_SUBNET_MASK, DHCP_ROUTERS, DHCP_DNS_SERVERS, DHCP_LOG_SERVERS, DHCP_HOST_NAME, DHCP_DOMAIN_NAME, - DHCP_ROOT_PATH, DHCP_VENDOR_ENCAP, DHCP_VENDOR_CLASS_ID, - DHCP_TFTP_SERVER_NAME, DHCP_BOOTFILE_NAME, - DHCP_DOMAIN_SEARCH, + DHCP_ROOT_PATH, DHCP_MTU, DHCP_VENDOR_ENCAP, + DHCP_VENDOR_CLASS_ID, DHCP_TFTP_SERVER_NAME, + DHCP_BOOTFILE_NAME, DHCP_DOMAIN_SEARCH, 128, 129, 130, 131, 132, 133, 134, 135, /* for PXE */ DHCP_EB_ENCAP, DHCP_ISCSI_INITIATOR_IQN ), DHCP_END -- 2.7.4 _______________________________________________ ipxe-devel mailing list ipxe-devel@lists.ipxe.org https://lists.ipxe.org/mailman/listinfo.cgi/ipxe-devel