Hi Tim, On 7/10/25 01:53, Tim Harvey wrote: > On Tue, Apr 15, 2025 at 2:18 PM Jerome Forissier > <jerome.foriss...@linaro.org> wrote: >> >> The things that are done prior to executing a network command with >> NET_LWIP are not consistent with what is done with NET. It impacts the >> selection of the current device, and more precisely if the active device >> is invalid NET would return an error while NET_LWIP would try to pick a >> new device. This incorrect behavior was detected thanks to the eth_rotate >> sandbox test (dm_test_eth_rotate()). >> >> Fix it by re-using a sequence similar to what NET has in net_loop(). >> This piece of code is inserted in a function called net_lwip_eth_start() >> renamed from net_lwip_eth_set_current(). >> >> Signed-off-by: Jerome Forissier <jerome.foriss...@linaro.org> >> Reviewed-by: Simon Glass <s...@chromium.org> >> --- >> >> (no changes since v1) >> >> include/net-lwip.h | 9 ++++++++- >> net/lwip/dhcp.c | 3 ++- >> net/lwip/dns.c | 3 ++- >> net/lwip/net-lwip.c | 40 +++++++++++++++++++++++++++++----------- >> net/lwip/ping.c | 3 ++- >> net/lwip/tftp.c | 5 ++++- >> net/lwip/wget.c | 6 +++++- >> 7 files changed, 52 insertions(+), 17 deletions(-) >> >> diff --git a/include/net-lwip.h b/include/net-lwip.h >> index 64e5c720560..0d3bb8a8bd8 100644 >> --- a/include/net-lwip.h >> +++ b/include/net-lwip.h >> @@ -10,7 +10,14 @@ enum proto_t { >> TFTPGET >> }; >> >> -void net_lwip_set_current(void); >> +static inline int eth_is_on_demand_init(void) >> +{ >> + return 1; >> +} >> + >> +int eth_init_state_only(void); /* Set active state */ >> + >> +int net_lwip_eth_start(void); >> struct netif *net_lwip_new_netif(struct udevice *udev); >> struct netif *net_lwip_new_netif_noip(struct udevice *udev); >> void net_lwip_remove_netif(struct netif *netif); >> diff --git a/net/lwip/dhcp.c b/net/lwip/dhcp.c >> index 3b7e4700c6e..92bd7067a7f 100644 >> --- a/net/lwip/dhcp.c >> +++ b/net/lwip/dhcp.c >> @@ -115,7 +115,8 @@ int do_dhcp(struct cmd_tbl *cmdtp, int flag, int argc, >> char *const argv[]) >> int ret; >> struct udevice *dev; >> >> - net_lwip_set_current(); >> + if (net_lwip_eth_start() < 0) >> + return CMD_RET_FAILURE; >> >> dev = eth_get_dev(); >> if (!dev) { >> diff --git a/net/lwip/dns.c b/net/lwip/dns.c >> index 149bdb784dc..19172ac959a 100644 >> --- a/net/lwip/dns.c >> +++ b/net/lwip/dns.c >> @@ -121,7 +121,8 @@ int do_dns(struct cmd_tbl *cmdtp, int flag, int argc, >> char *const argv[]) >> if (argc == 3) >> var = argv[2]; >> >> - net_lwip_set_current(); >> + if (net_lwip_eth_start() < 0) >> + return CMD_RET_FAILURE; >> >> return dns_loop(eth_get_dev(), name, var); >> } >> diff --git a/net/lwip/net-lwip.c b/net/lwip/net-lwip.c >> index 6b7b696dbf0..6748008736b 100644 >> --- a/net/lwip/net-lwip.c >> +++ b/net/lwip/net-lwip.c >> @@ -134,18 +134,27 @@ static int get_udev_ipv4_info(struct udevice *dev, >> ip4_addr_t *ip, >> return 0; >> } >> >> -/* Initialize the lwIP stack and the ethernet devices and set current >> device */ >> -void net_lwip_set_current(void) >> +/* >> + * Initialize the network stack if needed and start the current device if >> valid >> + */ >> +int net_lwip_eth_start(void) >> { >> - static bool init_done; >> + int ret; >> >> - if (!init_done) { >> - eth_init_rings(); >> - eth_init(); >> - lwip_init(); >> - init_done = true; >> + net_init(); >> + if (eth_is_on_demand_init()) { >> + eth_halt(); >> + eth_set_current(); >> + ret = eth_init(); >> + if (ret < 0) { >> + eth_halt(); >> + return ret; >> + } >> + } else { >> + eth_init_state_only(); >> } >> - eth_set_current(); >> + >> + return 0; >> } >> >> static struct netif *new_netif(struct udevice *udev, bool with_ip) >> @@ -224,11 +233,20 @@ void net_lwip_remove_netif(struct netif *netif) >> free(netif); >> } >> >> +/* >> + * Initialize the network buffers, an ethernet device, and the lwIP stack >> + * (once). >> + */ >> int net_init(void) >> { >> - eth_set_current(); >> + static bool init_done; >> >> - net_lwip_new_netif(eth_get_dev()); >> + if (!init_done) { >> + eth_init_rings(); >> + eth_init(); >> + lwip_init(); >> + init_done = true; >> + } >> >> return 0; >> } >> diff --git a/net/lwip/ping.c b/net/lwip/ping.c >> index c586a96806d..542ef2cb148 100644 >> --- a/net/lwip/ping.c >> +++ b/net/lwip/ping.c >> @@ -168,7 +168,8 @@ int do_ping(struct cmd_tbl *cmdtp, int flag, int argc, >> char *const argv[]) >> if (!ipaddr_aton(argv[1], &addr)) >> return CMD_RET_USAGE; >> >> - net_lwip_set_current(); >> + if (net_lwip_eth_start() < 0) >> + return CMD_RET_FAILURE; >> >> if (ping_loop(eth_get_dev(), &addr) < 0) >> return CMD_RET_FAILURE; >> diff --git a/net/lwip/tftp.c b/net/lwip/tftp.c >> index 123d66b5dba..4f9b2049187 100644 >> --- a/net/lwip/tftp.c >> +++ b/net/lwip/tftp.c >> @@ -280,7 +280,10 @@ int do_tftpb(struct cmd_tbl *cmdtp, int flag, int argc, >> char *const argv[]) >> goto out; >> } >> >> - net_lwip_set_current(); >> + if (net_lwip_eth_start() < 0) { >> + ret = CMD_RET_FAILURE; >> + goto out; >> + } >> >> if (tftp_loop(eth_get_dev(), laddr, fname, srvip, port) < 0) >> ret = CMD_RET_FAILURE; >> diff --git a/net/lwip/wget.c b/net/lwip/wget.c >> index ec098148835..a3b82908877 100644 >> --- a/net/lwip/wget.c >> +++ b/net/lwip/wget.c >> @@ -471,7 +471,11 @@ static int wget_loop(struct udevice *udev, ulong >> dst_addr, char *uri) >> >> int wget_do_request(ulong dst_addr, char *uri) >> { >> - net_lwip_set_current(); >> + int ret; >> + >> + ret = net_lwip_eth_start(); >> + if (ret < 0) >> + return ret; >> >> if (!wget_info) >> wget_info = &default_wget_info; >> -- >> 2.43.0 >> > > Hi Jerome, > > I'm seeing an issue with NET_LWIP related to this patch. > > In net_lwip_eth_start the eth_init function is called first in > net_init then again within the if eth_is_on_demand. This causes a > situation where the netdev is started, then stopped and started again > and in the case of at least the dwc_eth_qos device this causes the phy > to drop link after negotiation has been performed the first time a > network operation occurs. > > The situation is this on an imx8mp_venice board for any network action > (dhcp, tftp, etc): > u-boot=> dhcp > ethernet@30bf0000 Waiting for PHY auto negotiation to complete...... done > No link > FAILED: -11 > eqos_start ethernet@30bf0000 > No link > FAILED: -11 > Could not start ethernet@30bf0000 > > Here is some debugging added: > lwip with eqos: > u-boot=> dhcp > net_lwip_eth_start > eth_start_udev ethernet@30bf0000 running=0 > eqos_start ethernet@30bf0000 phy=null > eqos_start ethernet@30bf0000 calling phy_connect > eqos_start ethernet@30bf0000 connected to phy ethernet@30bf0000 > eqos_start ethernet@30bf0000 calling phy_config > ethernet@30bf0000 Waiting for PHY auto negotiation to complete....... done > eth_start_udev ethernet@30bf0000 done running=1 > ^^^ this was a result of eth_init being called in net_init > net_lwip_eth_start calling eth_halt > eth_halt in_init_halt=0 > eth_halt current=ethernet@30bf0000 > eth_halt running=1 > ^^^ now, because we are already running halt continues on to halt the > interface > eth_halt ethernet@30bf0000 calling stop > eqos_stop ethernet@30bf0000 > phy_shutdown ethernet@30bf0000 > ^^^ phy is shutdown here > net_lwip_eth_start calling eth_set_current > net_lwip_eth_start calling eth_init > eth_start_udev ethernet@30bf0000 running=0 > eqos_start ethernet@30bf0000 phy=ethernet@30bf0000 > ^^^ phy links again but link has not been established by the time we > try to use it here > No link > phy_shutdown ethernet@30bf0000 > FAILED: -11 > eth_start_udev ethernet@30bf0000 running=0 > eqos_start ethernet@30bf0000 phy=ethernet@30bf0000 > eth_start_udev ethernet@30bf0000 done running=1 > DHCP client bound to address 172.24.23.42 (1110 ms) > u-boot=> dhcp > net_lwip_eth_start > net_lwip_eth_start calling eth_halt > eth_halt in_init_halt=0 > eth_halt current=ethernet@30bf0000 > eth_halt running=1 > eth_halt ethernet@30bf0000 calling stop > eqos_stop ethernet@30bf0000 > phy_shutdown ethernet@30bf0000 > net_lwip_eth_start calling eth_set_current > net_lwip_eth_start calling eth_init > eth_start_udev ethernet@30bf0000 running=0 > eqos_start ethernet@30bf0000 phy=ethernet@30bf0000 > eth_start_udev ethernet@30bf0000 done running=1 > eth_start_udev ethernet@30bf0000 running=1 > DHCP client bound to address 172.24.23.42 (29 ms) > ^^^ any follow-on network operation is fine as the PHY has re-linked > at this point > > legacy: > u-boot=> dhcp > net_loop calling eth_halt() > eth_halt in_init_halt=0 > eth_halt current=ethernet@30bf0000 > eth_halt running=0 > ^^^ does not call stop because its not running because eth_init hasn't > been called already > eqos_start ethernet@30bf0000 > DP83867 ethernet@30bf0000 Waiting for PHY auto negotiation to > complete....... done > ^^^ works > > Here's another board (imx8mm_venice) which has a different MAC (fec) > that works fine with this patch: > u-boot=> dhcp > net_lwip_eth_start > net_init init_done=0 > eth_start_udev ethernet@30be0000 running=0 > fecmxc_init ethernet@30be0000 > ethernet@30be0000 Waiting for PHY auto negotiation to complete.... done > eth_start_udev ethernet@30be0000 done running=1 > net_lwip_eth_start calling eth_halt > eth_halt in_init_halt=0 > eth_halt current=ethernet@30be0000 > eth_halt running=1 > eth_halt ethernet@30be0000 calling stop > fecmxc_halt ethernet@30be0000 > ^^^ halt is called but for this MAC it doesn't drop the link > net_lwip_eth_start calling eth_set_current > net_lwip_eth_start calling eth_init > eth_start_udev ethernet@30be0000 running=0 > fecmxc_init ethernet@30be0000 > eth_start_udev ethernet@30be0000 done running=1 > eth_start_udev ethernet@30be0000 running=1 > DHCP client bound to address 172.24.23.44 (1117 ms) > > The difference here is the FEC mac driver (fec_mxc.c) is not disturbed > by a start->stop->start. I'm not exactly sure why this is... I thought > it was because the FEC driver configures the PHY in the probe vs the > start but even if I remove the phy_shutdown call from the EQOS driver > it still drops the link on eqos_stop. > > Removing the call the eth_init from net_init makes NET_LWIP behave > like legacy and makes more sense as you don't get a > 'start->stop->start' call sequence to your netdev but honestly I'm not > sure if this also points out an issue with either the fec driver or > the eqos driver regarding where/how the PHY is handled. > > Would you agree with: > > net: lwip: remove eth_init from net_init as its called later > > The call to eth_init within net_init casues the network interface to
causes > start, stop, start again which can cause issues with certain network > device drivers. Remove it to make it behave like the legacy network > path. > > diff --git a/net/lwip/net-lwip.c b/net/lwip/net-lwip.c > index 3918d57d7e58..d53faa39ba11 100644 > --- a/net/lwip/net-lwip.c > +++ b/net/lwip/net-lwip.c > @@ -285,7 +285,6 @@ int net_init(void) > > if (!init_done) { > eth_init_rings(); > - eth_init(); > lwip_init(); > init_done = true; > } Yes, that looks good to me. Please also update the comment: diff --git a/net/lwip/net-lwip.c b/net/lwip/net-lwip.c index f05c4cd3f64..505d002e185 100644 --- a/net/lwip/net-lwip.c +++ b/net/lwip/net-lwip.c @@ -237,8 +237,7 @@ void net_lwip_remove_netif(struct netif *netif) } /* - * Initialize the network buffers, an ethernet device, and the lwIP stack - * (once). + * Initialize the network buffers and the lwIP stack (once). */ int net_init(void) { While looking at this I also noted that with NET_LWIP eth_is_on_demand_init() is always true so net_lwip_eth_start() could be simplified. Feel free to add a patch for that too :) Thanks, -- Jerome > > Best Regards, > > Tim