On 07/09/21(Tue) 15:48, Stefan Sperling wrote:
> This patch makes iwx(4) resume reliably for me.
>
> There were missing splnet() calls which leads to an obvious race
> between the interrupt handler and the code which triggers firmware
> loading and then sleeps to wait for confirmation.
> This patch adds the missing splnet().
>
> However, even with splnet() protection added I need to add the
> following two lines of code to make the patch work reliably:
>
> /* wait for the firmware to load */
> for (w = 0; !sc->sc_uc.uc_intr && w < 10; w++) {
> err = tsleep_nsec(&sc->sc_uc, 0, "iwxuc", MSEC_TO_NSEC(100));
> + /* XXX This should not be needed, should it: */
> + if (err == EWOULDBLOCK && sc->sc_uc.uc_intr)
> + err = 0;
> }
> if (err || !sc->sc_uc.uc_ok)
> printf("%s: could not load firmware, %d\n", DEVNAME(sc), err);
>
> Which seems odd. I would expect tsleep to return EWOULDBLOCK only when
> the interrupt handler did not already set uc_intr and call wakeup().
That suggests the timeout fires before the wakeup(9). You can check
that by using wakeup_n() and print how many thread have been awaken.
> However, here tsleep returns with EWOULDBLOCK, after the interrupt did
> occur and firmware has reported that it is alive, so both uc_intr and uc_ok
> are set. This only seems to happen during resume (in a task scheduled
> during DVACT_WAKEUP), but not during autoconf or regular ifconfig down/up.
That suggests 100msec might be too small. Did you try with a bigger
value? Or is there something special happening during DVACT_WAKEUP?
> Am I missing something? Am I adding splnet() in the wrong place?
The splnet() you're adding ensure (with the KERNEL_LOCK()) no wakeup(9)
will happen until you go to sleep. I believe that's what you want.
> Does this patch work reliably for anyone without the above change?
>
> diff c05ef66598a004c1e173a5d0fd4cdf5403f2ad99 /usr/src
> blob - 428a299e7a2d859aa68a934d4048d843be1835ce
> file + sys/dev/pci/if_iwx.c
> --- sys/dev/pci/if_iwx.c
> +++ sys/dev/pci/if_iwx.c
> @@ -3350,6 +3350,8 @@ iwx_load_firmware(struct iwx_softc *sc)
> struct iwx_fw_sects *fws;
> int err, w;
>
> + splassert(IPL_NET);
> +
> sc->sc_uc.uc_intr = 0;
>
> fws = &sc->sc_fw.fw_sects[IWX_UCODE_TYPE_REGULAR];
> @@ -3362,6 +3364,9 @@ iwx_load_firmware(struct iwx_softc *sc)
> /* wait for the firmware to load */
> for (w = 0; !sc->sc_uc.uc_intr && w < 10; w++) {
> err = tsleep_nsec(&sc->sc_uc, 0, "iwxuc", MSEC_TO_NSEC(100));
> + /* XXX This should not be needed, should it: */
> + if (err == EWOULDBLOCK && sc->sc_uc.uc_intr)
> + err = 0;
> }
> if (err || !sc->sc_uc.uc_ok)
> printf("%s: could not load firmware, %d\n", DEVNAME(sc), err);
> @@ -3466,7 +3471,7 @@ iwx_run_init_mvm_ucode(struct iwx_softc *sc, int readn
> struct iwx_init_extended_cfg_cmd init_cfg = {
> .init_flags = htole32(IWX_INIT_NVM),
> };
> - int err;
> + int err, s;
>
> if ((sc->sc_flags & IWX_FLAG_RFKILL) && !readnvm) {
> printf("%s: radio is disabled by hardware switch\n",
> @@ -3474,10 +3479,12 @@ iwx_run_init_mvm_ucode(struct iwx_softc *sc, int readn
> return EPERM;
> }
>
> + s = splnet();
> sc->sc_init_complete = 0;
> err = iwx_load_ucode_wait_alive(sc);
> if (err) {
> printf("%s: failed to load init firmware\n", DEVNAME(sc));
> + splx(s);
> return err;
> }
>
> @@ -3487,22 +3494,28 @@ iwx_run_init_mvm_ucode(struct iwx_softc *sc, int readn
> */
> err = iwx_send_cmd_pdu(sc, IWX_WIDE_ID(IWX_SYSTEM_GROUP,
> IWX_INIT_EXTENDED_CFG_CMD), 0, sizeof(init_cfg), &init_cfg);
> - if (err)
> + if (err) {
> + splx(s);
> return err;
> + }
>
> err = iwx_send_cmd_pdu(sc, IWX_WIDE_ID(IWX_REGULATORY_AND_NVM_GROUP,
> IWX_NVM_ACCESS_COMPLETE), 0, sizeof(nvm_complete), &nvm_complete);
> - if (err)
> + if (err) {
> + splx(s);
> return err;
> + }
>
> /* Wait for the init complete notification from the firmware. */
> while ((sc->sc_init_complete & wait_flags) != wait_flags) {
> err = tsleep_nsec(&sc->sc_init_complete, 0, "iwxinit",
> SEC_TO_NSEC(2));
> - if (err)
> + if (err) {
> + splx(s);
> return err;
> + }
> }
> -
> + splx(s);
> if (readnvm) {
> err = iwx_nvm_get(sc);
> if (err) {
>