Re: 6.7 snaps upgrade went fine - Intel ax200ngw not so much

2020-06-08 Thread sven falempin
On Mon, Jun 8, 2020 at 1:48 PM Stefan Sperling  wrote:
>
> On Fri, May 22, 2020 at 01:48:28PM -0400, sven falempin wrote:
> >  After a few days ... (free size too small  288 < 1024 /2 )
> >
> > Maybe this can help make the driver better.
> >
> > printf '%x\n' $((0x350+0xf7)) ; grep -A2 'if_iwx.c:515'  /tmp/iwx.dis
> > 447
> > /usr/src/sys/dev/pci/if_iwx.c:515
> >  447:   41 c7 86 28 2f 05 00movl   $0x0,0x52f28(%r14)
> >  44e:   00 00 00 00
> >
> > [0]-[current]-[~]
> > # cat -n /usr/src/sys/dev/pci/if_iwx.c | grep -C5 -E '  515'
> >510  /* free paging*/
> >511  for (i = 0; i < dram->paging_cnt; i++)
> >512  iwx_dma_contig_free(dram->paging);
> >513
> >514  free(dram->paging, M_DEVBUF, dram->paging_cnt *
> > sizeof(*dram->paging));
> >515  dram->paging_cnt = 0;
> >516  dram->paging = NULL;
> >517  }
> >518
> >519  int
> >520  iwx_get_num_sections(const struct iwx_fw_sects *fws, int start
>
> This should fix free with a wrong size in the error case, and avoids
> re-allocating a chunk of DMA memory (sc->ctxt_info_dma) every time the
> firmware gets loaded. Instead, this chunk is now allocated once at
> attach time. This seems to be the allocation that failed in your case.
>
> diff 66ecf2e2f524653126dce17a447a43b26ee90abb /usr/src
> blob - c3ca08c7a726326e37cda8645596a176051b6cf4
> file + sys/dev/pci/if_iwx.c
> --- sys/dev/pci/if_iwx.c
> +++ sys/dev/pci/if_iwx.c
> @@ -230,7 +230,7 @@ int iwx_alloc_fw_monitor_block(struct iwx_softc *, uin
>  intiwx_alloc_fw_monitor(struct iwx_softc *, uint8_t);
>  intiwx_apply_debug_destination(struct iwx_softc *);
>  intiwx_ctxt_info_init(struct iwx_softc *, const struct iwx_fw_sects *);
> -void   iwx_ctxt_info_free(struct iwx_softc *);
> +void   iwx_ctxt_info_free_fw_img(struct iwx_softc *);
>  void   iwx_ctxt_info_free_paging(struct iwx_softc *);
>  intiwx_init_fw_sec(struct iwx_softc *, const struct iwx_fw_sects *,
> struct iwx_context_info_dram *);
> @@ -535,52 +535,60 @@ iwx_init_fw_sec(struct iwx_softc *sc, const struct iwx
>  struct iwx_context_info_dram *ctxt_dram)
>  {
> struct iwx_self_init_dram *dram = &sc->init_dram;
> -   int i, ret, lmac_cnt, umac_cnt, paging_cnt;
> +   int i, ret, fw_cnt = 0;
>
> KASSERT(dram->paging == NULL);
>
> -   lmac_cnt = iwx_get_num_sections(fws, 0);
> +   dram->lmac_cnt = iwx_get_num_sections(fws, 0);
> /* add 1 due to separator */
> -   umac_cnt = iwx_get_num_sections(fws, lmac_cnt + 1);
> +   dram->umac_cnt = iwx_get_num_sections(fws, dram->lmac_cnt + 1);
> /* add 2 due to separators */
> -   paging_cnt = iwx_get_num_sections(fws, lmac_cnt + umac_cnt + 2);
> +   dram->paging_cnt = iwx_get_num_sections(fws,
> +   dram->lmac_cnt + dram->umac_cnt + 2);
>
> -   dram->fw = mallocarray(umac_cnt + lmac_cnt, sizeof(*dram->fw),
> -   M_DEVBUF,  M_ZERO | M_NOWAIT);
> -   if (!dram->fw)
> +   dram->fw = mallocarray(dram->umac_cnt + dram->lmac_cnt,
> +   sizeof(*dram->fw), M_DEVBUF,  M_ZERO | M_NOWAIT);
> +   if (!dram->fw) {
> +   printf("%s: could not allocate memory for firmware 
> sections\n",
> +   DEVNAME(sc));
> return ENOMEM;
> -   dram->paging = mallocarray(paging_cnt, sizeof(*dram->paging),
> +   }
> +
> +   dram->paging = mallocarray(dram->paging_cnt, sizeof(*dram->paging),
> M_DEVBUF, M_ZERO | M_NOWAIT);
> -   if (!dram->paging)
> +   if (!dram->paging) {
> +   printf("%s: could not allocate memory for firmware paging\n",
> +   DEVNAME(sc));
> return ENOMEM;
> +   }
>
> /* initialize lmac sections */
> -   for (i = 0; i < lmac_cnt; i++) {
> +   for (i = 0; i < dram->lmac_cnt; i++) {
> ret = iwx_ctxt_info_alloc_dma(sc, &fws->fw_sect[i],
> -  &dram->fw[dram->fw_cnt]);
> +  &dram->fw[fw_cnt]);
> if (ret)
> return ret;
> ctxt_dram->lmac_img[i] =
> -   htole64(dram->fw[dram->fw_cnt].paddr);
> +   htole64(dram->fw[fw_cnt].paddr);
> DPRINTF(("%s: firmware LMAC section %d at 0x%llx size 
> %lld\n", __func__, i,
> -   (unsigned long long)dram->fw[dram->fw_cnt].paddr,
> -   (unsigned long long)dram->fw[dram->fw_cnt].size));
> -   dram->fw_cnt++;
> +   (unsigned long long)dram->fw[fw_cnt].paddr,
> +   (unsigned long long)dram->fw[fw_cnt].size));
> +   fw_cnt++;
> }
>
> /* initialize umac sections */
> -   for (i = 0; i < umac_cnt; i++) {
> +   for (i = 0; i < dram->umac_cnt; i++) {
> /* access FW with +1 to make

Re: 6.7 snaps upgrade went fine - Intel ax200ngw not so much

2020-06-08 Thread Stefan Sperling
On Fri, May 22, 2020 at 01:48:28PM -0400, sven falempin wrote:
>  After a few days ... (free size too small  288 < 1024 /2 )
> 
> Maybe this can help make the driver better.
> 
> printf '%x\n' $((0x350+0xf7)) ; grep -A2 'if_iwx.c:515'  /tmp/iwx.dis
> 447
> /usr/src/sys/dev/pci/if_iwx.c:515
>  447:   41 c7 86 28 2f 05 00movl   $0x0,0x52f28(%r14)
>  44e:   00 00 00 00
> 
> [0]-[current]-[~]
> # cat -n /usr/src/sys/dev/pci/if_iwx.c | grep -C5 -E '  515'
>510  /* free paging*/
>511  for (i = 0; i < dram->paging_cnt; i++)
>512  iwx_dma_contig_free(dram->paging);
>513
>514  free(dram->paging, M_DEVBUF, dram->paging_cnt *
> sizeof(*dram->paging));
>515  dram->paging_cnt = 0;
>516  dram->paging = NULL;
>517  }
>518
>519  int
>520  iwx_get_num_sections(const struct iwx_fw_sects *fws, int start

This should fix free with a wrong size in the error case, and avoids
re-allocating a chunk of DMA memory (sc->ctxt_info_dma) every time the
firmware gets loaded. Instead, this chunk is now allocated once at
attach time. This seems to be the allocation that failed in your case.

diff 66ecf2e2f524653126dce17a447a43b26ee90abb /usr/src
blob - c3ca08c7a726326e37cda8645596a176051b6cf4
file + sys/dev/pci/if_iwx.c
--- sys/dev/pci/if_iwx.c
+++ sys/dev/pci/if_iwx.c
@@ -230,7 +230,7 @@ int iwx_alloc_fw_monitor_block(struct iwx_softc *, uin
 intiwx_alloc_fw_monitor(struct iwx_softc *, uint8_t);
 intiwx_apply_debug_destination(struct iwx_softc *);
 intiwx_ctxt_info_init(struct iwx_softc *, const struct iwx_fw_sects *);
-void   iwx_ctxt_info_free(struct iwx_softc *);
+void   iwx_ctxt_info_free_fw_img(struct iwx_softc *);
 void   iwx_ctxt_info_free_paging(struct iwx_softc *);
 intiwx_init_fw_sec(struct iwx_softc *, const struct iwx_fw_sects *,
struct iwx_context_info_dram *);
@@ -535,52 +535,60 @@ iwx_init_fw_sec(struct iwx_softc *sc, const struct iwx
 struct iwx_context_info_dram *ctxt_dram)
 {
struct iwx_self_init_dram *dram = &sc->init_dram;
-   int i, ret, lmac_cnt, umac_cnt, paging_cnt;
+   int i, ret, fw_cnt = 0;
 
KASSERT(dram->paging == NULL);
 
-   lmac_cnt = iwx_get_num_sections(fws, 0);
+   dram->lmac_cnt = iwx_get_num_sections(fws, 0);
/* add 1 due to separator */
-   umac_cnt = iwx_get_num_sections(fws, lmac_cnt + 1);
+   dram->umac_cnt = iwx_get_num_sections(fws, dram->lmac_cnt + 1);
/* add 2 due to separators */
-   paging_cnt = iwx_get_num_sections(fws, lmac_cnt + umac_cnt + 2);
+   dram->paging_cnt = iwx_get_num_sections(fws,
+   dram->lmac_cnt + dram->umac_cnt + 2);
 
-   dram->fw = mallocarray(umac_cnt + lmac_cnt, sizeof(*dram->fw),
-   M_DEVBUF,  M_ZERO | M_NOWAIT);
-   if (!dram->fw)
+   dram->fw = mallocarray(dram->umac_cnt + dram->lmac_cnt,
+   sizeof(*dram->fw), M_DEVBUF,  M_ZERO | M_NOWAIT);
+   if (!dram->fw) {
+   printf("%s: could not allocate memory for firmware sections\n",
+   DEVNAME(sc));
return ENOMEM;
-   dram->paging = mallocarray(paging_cnt, sizeof(*dram->paging),
+   }
+
+   dram->paging = mallocarray(dram->paging_cnt, sizeof(*dram->paging),
M_DEVBUF, M_ZERO | M_NOWAIT);
-   if (!dram->paging)
+   if (!dram->paging) {
+   printf("%s: could not allocate memory for firmware paging\n",
+   DEVNAME(sc));
return ENOMEM;
+   }
 
/* initialize lmac sections */
-   for (i = 0; i < lmac_cnt; i++) {
+   for (i = 0; i < dram->lmac_cnt; i++) {
ret = iwx_ctxt_info_alloc_dma(sc, &fws->fw_sect[i],
-  &dram->fw[dram->fw_cnt]);
+  &dram->fw[fw_cnt]);
if (ret)
return ret;
ctxt_dram->lmac_img[i] =
-   htole64(dram->fw[dram->fw_cnt].paddr);
+   htole64(dram->fw[fw_cnt].paddr);
DPRINTF(("%s: firmware LMAC section %d at 0x%llx size %lld\n", 
__func__, i,
-   (unsigned long long)dram->fw[dram->fw_cnt].paddr,
-   (unsigned long long)dram->fw[dram->fw_cnt].size));
-   dram->fw_cnt++;
+   (unsigned long long)dram->fw[fw_cnt].paddr,
+   (unsigned long long)dram->fw[fw_cnt].size));
+   fw_cnt++;
}
 
/* initialize umac sections */
-   for (i = 0; i < umac_cnt; i++) {
+   for (i = 0; i < dram->umac_cnt; i++) {
/* access FW with +1 to make up for lmac separator */
ret = iwx_ctxt_info_alloc_dma(sc,
-   &fws->fw_sect[dram->fw_cnt + 1], &dram->fw[dram->fw_cnt]);
+   &fws->fw_sect[fw_cnt + 1], &dram->fw[fw_cnt]);
if (ret)
  

Re: 6.7 snaps upgrade went fine - Intel ax200ngw not so much

2020-05-16 Thread sven falempin
On Fri, May 15, 2020 at 11:17 AM Stefan Sperling  wrote:

> On Fri, May 15, 2020 at 11:11:44AM -0400, sven falempin wrote:
> > Index: if_iwx.c
> > ===
> > RCS file: /cvs/src/sys/dev/pci/if_iwx.c,v
> > retrieving revision 1.11
> > diff -u -p -r1.11 if_iwx.c
> > --- if_iwx.c29 Apr 2020 13:13:30 -  1.11
> > +++ if_iwx.c15 May 2020 15:08:45 -
> > @@ -3222,6 +3222,9 @@ iwx_run_init_mvm_ucode(struct iwx_softc
> >  * Send init config command to mark that we are sending NVM
> >  * access commands
> >  */
> > +   printf("%s: DELAYING\n", DEVNAME(sc));
> > +   DELAY(5000);
> > +
> > err = iwx_send_cmd_pdu(sc, IWX_WIDE_ID(IWX_SYSTEM_GROUP,
> > IWX_INIT_EXTENDED_CFG_CMD), 0, sizeof(init_cfg), &init_cfg);
> > if (err)
> >
> > Gave
> >
> > iwx0: DELAYING
> > iwx0: dumping device error log
> > iwx0: Start Error Log Dump:
> > iwx0: Status: 0x1, count: 6
> > iwx0: 0x0071 | NMI_INTERRUPT_UMAC_FATAL
> > iwx0: 0020A2F0 | trm_hw_status0
> > iwx0:  | trm_hw_status1
> > iwx0: 004FC308 | branchlink2
> > iwx0: 00016E5A | interruptlink1
> > iwx0: 00016E5A | interruptlink2
> > iwx0: 004F9F62 | data1
> > iwx0: 1000 | data2
> > iwx0: F008 | data3
> > iwx0:  | beacon time
> > iwx0: 000115E1 | tsf low
> > iwx0:  | tsf hi
> > iwx0:  | time gp1
> > iwx0: 000115E2 | time gp2
> > iwx0: 0001 | uCode revision type
> > iwx0: 002E | uCode version major
> > iwx0: 177B3E46 | uCode version minor
> > iwx0: 0340 | hw version
> > iwx0: 18889000 | board version
> > iwx0: 800AFD0C | hcmd
> > iwx0: 2002 | isr0
> > iwx0:  | isr1
> > iwx0: 18F2 | isr2
> > iwx0: 00CC | isr3
> > iwx0:  | isr4
> > iwx0:  | last cmd Id
> > iwx0: 004F9F62 | wait_event
> > iwx0:  | l2p_control
> > iwx0: 0020 | l2p_duration
> > iwx0:  | l2p_mhvalid
> > iwx0:  | l2p_addr_match
> > iwx0: 0009 | lmpm_pmg_sel
> > iwx0: 19071335 | timestamp
> > iwx0: 0828 | flow_handler
> > iwx0: Start UMAC Error Log Dump:
> > iwx0: Status: 0x1, count: 7
> > iwx0: 0x201010A3 | ADVANCED_SYSASSERT
> > iwx0: 0x | umac branchlink1
> > iwx0: 0xC008B1C0 | umac branchlink2
> > iwx0: 0xC0084E04 | umac interruptlink1
> > iwx0: 0x | umac interruptlink2
> > iwx0: 0x0002 | umac data1
> > iwx0: 0x0001 | umac data2
> > iwx0: 0xDEADBEEF | umac data3
> > iwx0: 0x002E | umac major
> > iwx0: 0x177B3E46 | umac minor
> > iwx0: 0x000115D2 | frame pointer
> > iwx0: 0xC0886C6C | stack pointer
> > iwx0: 0x00050C00 | last host cmd
> > iwx0: 0x | isr status reg
> > driver status:
> >   tx ring  0: qid=0  cur=6   queued=0
> >   tx ring  1: qid=1  cur=0   queued=0
> >   tx ring  2: qid=2  cur=0   queued=0
> >   tx ring  3: qid=3  cur=0   queued=0
> >   tx ring  4: qid=4  cur=0   queued=0
> >   tx ring  5: qid=5  cur=0   queued=0
> >   tx ring  6: qid=6  cur=0   queued=0
> >   tx ring  7: qid=7  cur=0   queued=0
> >   tx ring  8: qid=8  cur=0   queued=0
> >   tx ring  9: qid=9  cur=0   queued=0
> >   tx ring 10: qid=10 cur=0   queued=0
> >   tx ring 11: qid=11 cur=0   queued=0
> >   tx ring 12: qid=12 cur=0   queued=0
> >   tx ring 13: qid=13 cur=0   queued=0
> >   tx ring 14: qid=14 cur=0   queued=0
> >   tx ring 15: qid=15 cur=0   queued=0
> >   tx ring 16: qid=16 cur=0   queued=0
> >   tx ring 17: qid=17 cur=0   queued=0
> >   tx ring 18: qid=18 cur=0   queued=0
> >   tx ring 19: qid=19 cur=0   queued=0
> >   tx ring 20: qid=20 cur=0   queued=0
> >   tx ring 21: qid=21 cur=0   queued=0
> >   tx ring 22: qid=22 cur=0   queued=0
> >   tx ring 23: qid=23 cur=0   queued=0
> >   tx ring 24: qid=24 cur=0   queued=0
> >   tx ring 25: qid=25 cur=0   queued=0
> >   tx ring 26: qid=26 cur=0   queued=0
> >   tx ring 27: qid=27 cur=0   queued=0
> >   tx ring 28: qid=28 cur=0   queued=0
> >   tx ring 29: qid=29 cur=0   queued=0
> >   tx ring 30: qid=30 cur=0   queued=0
> >   rx ring: cur=265
> >   802.11 state INIT
> > iwx0: fatal firmware error
> >
> > I think the delay must be somewhere after.
>
> Ouch. Yes, looks like that's a bad spot.
>
> Though it is an interesting observation that waiting there for a long time
> causes another problem.
>
> > I know it s a bit silly , but would a test with
> >
> > #ifdef IWX_DEBUG
> > #define DPRINTF(x)  do { if (iwx_debug > 0) printf x; } while (0)
> > #define DPRINTFN(n, x)  do { if (iwx_debug >= (n)) printf x; } while (0)
> > int iwx_debug = 1;
> > #else
> > #define DPRINTF(x)  do { DELAY(10); } while (1)
> > #define DPRINTFN(n, x)  do { DELAY(10); } while (1)
> > #endif
> >
> > makes sense ?
>
> Not really. That just puts DELAY(10) in some arbitrary places.
> What we need to know is where exactly the driver is going too fast.
>

Current test.

# dmesg | grep fatal
iwx0: fatal firmware error 1
iwx0: fatal firmware error 1
iwx0: fatal firmware error 1
iwx0: fatal firmware error 1
iwx0: f

Re: 6.7 snaps upgrade went fine - Intel ax200ngw not so much

2020-05-15 Thread Stefan Sperling
On Fri, May 15, 2020 at 11:11:44AM -0400, sven falempin wrote:
> Index: if_iwx.c
> ===
> RCS file: /cvs/src/sys/dev/pci/if_iwx.c,v
> retrieving revision 1.11
> diff -u -p -r1.11 if_iwx.c
> --- if_iwx.c29 Apr 2020 13:13:30 -  1.11
> +++ if_iwx.c15 May 2020 15:08:45 -
> @@ -3222,6 +3222,9 @@ iwx_run_init_mvm_ucode(struct iwx_softc
>  * Send init config command to mark that we are sending NVM
>  * access commands
>  */
> +   printf("%s: DELAYING\n", DEVNAME(sc));
> +   DELAY(5000);
> +
> err = iwx_send_cmd_pdu(sc, IWX_WIDE_ID(IWX_SYSTEM_GROUP,
> IWX_INIT_EXTENDED_CFG_CMD), 0, sizeof(init_cfg), &init_cfg);
> if (err)
> 
> Gave
> 
> iwx0: DELAYING
> iwx0: dumping device error log
> iwx0: Start Error Log Dump:
> iwx0: Status: 0x1, count: 6
> iwx0: 0x0071 | NMI_INTERRUPT_UMAC_FATAL
> iwx0: 0020A2F0 | trm_hw_status0
> iwx0:  | trm_hw_status1
> iwx0: 004FC308 | branchlink2
> iwx0: 00016E5A | interruptlink1
> iwx0: 00016E5A | interruptlink2
> iwx0: 004F9F62 | data1
> iwx0: 1000 | data2
> iwx0: F008 | data3
> iwx0:  | beacon time
> iwx0: 000115E1 | tsf low
> iwx0:  | tsf hi
> iwx0:  | time gp1
> iwx0: 000115E2 | time gp2
> iwx0: 0001 | uCode revision type
> iwx0: 002E | uCode version major
> iwx0: 177B3E46 | uCode version minor
> iwx0: 0340 | hw version
> iwx0: 18889000 | board version
> iwx0: 800AFD0C | hcmd
> iwx0: 2002 | isr0
> iwx0:  | isr1
> iwx0: 18F2 | isr2
> iwx0: 00CC | isr3
> iwx0:  | isr4
> iwx0:  | last cmd Id
> iwx0: 004F9F62 | wait_event
> iwx0:  | l2p_control
> iwx0: 0020 | l2p_duration
> iwx0:  | l2p_mhvalid
> iwx0:  | l2p_addr_match
> iwx0: 0009 | lmpm_pmg_sel
> iwx0: 19071335 | timestamp
> iwx0: 0828 | flow_handler
> iwx0: Start UMAC Error Log Dump:
> iwx0: Status: 0x1, count: 7
> iwx0: 0x201010A3 | ADVANCED_SYSASSERT
> iwx0: 0x | umac branchlink1
> iwx0: 0xC008B1C0 | umac branchlink2
> iwx0: 0xC0084E04 | umac interruptlink1
> iwx0: 0x | umac interruptlink2
> iwx0: 0x0002 | umac data1
> iwx0: 0x0001 | umac data2
> iwx0: 0xDEADBEEF | umac data3
> iwx0: 0x002E | umac major
> iwx0: 0x177B3E46 | umac minor
> iwx0: 0x000115D2 | frame pointer
> iwx0: 0xC0886C6C | stack pointer
> iwx0: 0x00050C00 | last host cmd
> iwx0: 0x | isr status reg
> driver status:
>   tx ring  0: qid=0  cur=6   queued=0
>   tx ring  1: qid=1  cur=0   queued=0
>   tx ring  2: qid=2  cur=0   queued=0
>   tx ring  3: qid=3  cur=0   queued=0
>   tx ring  4: qid=4  cur=0   queued=0
>   tx ring  5: qid=5  cur=0   queued=0
>   tx ring  6: qid=6  cur=0   queued=0
>   tx ring  7: qid=7  cur=0   queued=0
>   tx ring  8: qid=8  cur=0   queued=0
>   tx ring  9: qid=9  cur=0   queued=0
>   tx ring 10: qid=10 cur=0   queued=0
>   tx ring 11: qid=11 cur=0   queued=0
>   tx ring 12: qid=12 cur=0   queued=0
>   tx ring 13: qid=13 cur=0   queued=0
>   tx ring 14: qid=14 cur=0   queued=0
>   tx ring 15: qid=15 cur=0   queued=0
>   tx ring 16: qid=16 cur=0   queued=0
>   tx ring 17: qid=17 cur=0   queued=0
>   tx ring 18: qid=18 cur=0   queued=0
>   tx ring 19: qid=19 cur=0   queued=0
>   tx ring 20: qid=20 cur=0   queued=0
>   tx ring 21: qid=21 cur=0   queued=0
>   tx ring 22: qid=22 cur=0   queued=0
>   tx ring 23: qid=23 cur=0   queued=0
>   tx ring 24: qid=24 cur=0   queued=0
>   tx ring 25: qid=25 cur=0   queued=0
>   tx ring 26: qid=26 cur=0   queued=0
>   tx ring 27: qid=27 cur=0   queued=0
>   tx ring 28: qid=28 cur=0   queued=0
>   tx ring 29: qid=29 cur=0   queued=0
>   tx ring 30: qid=30 cur=0   queued=0
>   rx ring: cur=265
>   802.11 state INIT
> iwx0: fatal firmware error
> 
> I think the delay must be somewhere after.

Ouch. Yes, looks like that's a bad spot.

Though it is an interesting observation that waiting there for a long time
causes another problem.

> I know it s a bit silly , but would a test with
> 
> #ifdef IWX_DEBUG
> #define DPRINTF(x)  do { if (iwx_debug > 0) printf x; } while (0)
> #define DPRINTFN(n, x)  do { if (iwx_debug >= (n)) printf x; } while (0)
> int iwx_debug = 1;
> #else
> #define DPRINTF(x)  do { DELAY(10); } while (1)
> #define DPRINTFN(n, x)  do { DELAY(10); } while (1)
> #endif
> 
> makes sense ?

Not really. That just puts DELAY(10) in some arbitrary places.
What we need to know is where exactly the driver is going too fast.



Re: 6.7 snaps upgrade went fine - Intel ax200ngw not so much

2020-05-15 Thread sven falempin
On Thu, May 14, 2020 at 5:55 AM Stefan Sperling  wrote:

> On Wed, May 13, 2020 at 07:55:02PM -0400, sven falempin wrote:
> > 'good news'
> >
> > I build a custom kernel with the DEBUG flag for the driver
>
> > I 'works' ,
>
> This means that the driver is doing something too fast on your hardware,
> and some miscommunication happens with the card as a result.
>
> One way to work around this is to add DELAY calls. It is not the ideal
> solution but would be a good first step to get the card working.
>
> Can you disable debugging again and try the patch below instead?
> If the problem re-appears, try to increase the amount of delay (up to 5000
> seems reasonable). If increasing the DELAY value does not help, try to move
> the DELAY call further down until it works.
>
> The DELAY may even need to be moved into the while loop inside
> iwx_nvm_init().
> But please try using the DELAY outside of a loop first.
>
> Finding the right spot might take some time. Welcome to driver development
> :)
>
> If you cannot find a spot for the DELAY that makes this work, then we will
> have to wait for someone else who is seeing the same problem and tries
> harder.
>
> diff 4a0fa473f5ea308b63ffd39645f73b2195291973 /usr/src
> blob - 64c3641a2d0d07a9d899c0b7ccdbe46d46e17b96
> file + sys/dev/pci/if_iwx.c
> --- sys/dev/pci/if_iwx.c
> +++ sys/dev/pci/if_iwx.c
> @@ -3222,6 +3222,7 @@ iwx_run_init_mvm_ucode(struct iwx_softc *sc, int
> readn
>  * Send init config command to mark that we are sending NVM
>  * access commands
>  */
> +   DELAY(1000);
> err = iwx_send_cmd_pdu(sc, IWX_WIDE_ID(IWX_SYSTEM_GROUP,
> IWX_INIT_EXTENDED_CFG_CMD), 0, sizeof(init_cfg), &init_cfg);
> if (err)
>


Index: if_iwx.c
===
RCS file: /cvs/src/sys/dev/pci/if_iwx.c,v
retrieving revision 1.11
diff -u -p -r1.11 if_iwx.c
--- if_iwx.c29 Apr 2020 13:13:30 -  1.11
+++ if_iwx.c15 May 2020 15:08:45 -
@@ -3222,6 +3222,9 @@ iwx_run_init_mvm_ucode(struct iwx_softc
 * Send init config command to mark that we are sending NVM
 * access commands
 */
+   printf("%s: DELAYING\n", DEVNAME(sc));
+   DELAY(5000);
+
err = iwx_send_cmd_pdu(sc, IWX_WIDE_ID(IWX_SYSTEM_GROUP,
IWX_INIT_EXTENDED_CFG_CMD), 0, sizeof(init_cfg), &init_cfg);
if (err)

Gave

iwx0: DELAYING
iwx0: dumping device error log
iwx0: Start Error Log Dump:
iwx0: Status: 0x1, count: 6
iwx0: 0x0071 | NMI_INTERRUPT_UMAC_FATAL
iwx0: 0020A2F0 | trm_hw_status0
iwx0:  | trm_hw_status1
iwx0: 004FC308 | branchlink2
iwx0: 00016E5A | interruptlink1
iwx0: 00016E5A | interruptlink2
iwx0: 004F9F62 | data1
iwx0: 1000 | data2
iwx0: F008 | data3
iwx0:  | beacon time
iwx0: 000115E1 | tsf low
iwx0:  | tsf hi
iwx0:  | time gp1
iwx0: 000115E2 | time gp2
iwx0: 0001 | uCode revision type
iwx0: 002E | uCode version major
iwx0: 177B3E46 | uCode version minor
iwx0: 0340 | hw version
iwx0: 18889000 | board version
iwx0: 800AFD0C | hcmd
iwx0: 2002 | isr0
iwx0:  | isr1
iwx0: 18F2 | isr2
iwx0: 00CC | isr3
iwx0:  | isr4
iwx0:  | last cmd Id
iwx0: 004F9F62 | wait_event
iwx0:  | l2p_control
iwx0: 0020 | l2p_duration
iwx0:  | l2p_mhvalid
iwx0:  | l2p_addr_match
iwx0: 0009 | lmpm_pmg_sel
iwx0: 19071335 | timestamp
iwx0: 0828 | flow_handler
iwx0: Start UMAC Error Log Dump:
iwx0: Status: 0x1, count: 7
iwx0: 0x201010A3 | ADVANCED_SYSASSERT
iwx0: 0x | umac branchlink1
iwx0: 0xC008B1C0 | umac branchlink2
iwx0: 0xC0084E04 | umac interruptlink1
iwx0: 0x | umac interruptlink2
iwx0: 0x0002 | umac data1
iwx0: 0x0001 | umac data2
iwx0: 0xDEADBEEF | umac data3
iwx0: 0x002E | umac major
iwx0: 0x177B3E46 | umac minor
iwx0: 0x000115D2 | frame pointer
iwx0: 0xC0886C6C | stack pointer
iwx0: 0x00050C00 | last host cmd
iwx0: 0x | isr status reg
driver status:
  tx ring  0: qid=0  cur=6   queued=0
  tx ring  1: qid=1  cur=0   queued=0
  tx ring  2: qid=2  cur=0   queued=0
  tx ring  3: qid=3  cur=0   queued=0
  tx ring  4: qid=4  cur=0   queued=0
  tx ring  5: qid=5  cur=0   queued=0
  tx ring  6: qid=6  cur=0   queued=0
  tx ring  7: qid=7  cur=0   queued=0
  tx ring  8: qid=8  cur=0   queued=0
  tx ring  9: qid=9  cur=0   queued=0
  tx ring 10: qid=10 cur=0   queued=0
  tx ring 11: qid=11 cur=0   queued=0
  tx ring 12: qid=12 cur=0   queued=0
  tx ring 13: qid=13 cur=0   queued=0
  tx ring 14: qid=14 cur=0   queued=0
  tx ring 15: qid=15 cur=0   queued=0
  tx ring 16: qid=16 cur=0   queued=0
  tx ring 17: qid=17 cur=0   queued=0
  tx ring 18: qid=18 cur=0   queued=0
  tx ring 19: qid=19 cur=0   queued=0
  tx ring 20: qid=20 cur=0   queued=0
  tx ring 21: qid=21 cur=0   queued=0
  tx ring 22: qid=22 cur=0   queued=0
  tx ring 23: qid=23 cur=0   queued=0
  tx ring 24: qid=24 cur=0   queued=0
  tx ring 25: qid=25

Re: 6.7 snaps upgrade went fine - Intel ax200ngw not so much

2020-05-14 Thread Stefan Sperling
On Wed, May 13, 2020 at 07:55:02PM -0400, sven falempin wrote:
> 'good news'
> 
> I build a custom kernel with the DEBUG flag for the driver

> I 'works' ,

This means that the driver is doing something too fast on your hardware,
and some miscommunication happens with the card as a result.

One way to work around this is to add DELAY calls. It is not the ideal
solution but would be a good first step to get the card working.

Can you disable debugging again and try the patch below instead?
If the problem re-appears, try to increase the amount of delay (up to 5000
seems reasonable). If increasing the DELAY value does not help, try to move
the DELAY call further down until it works.

The DELAY may even need to be moved into the while loop inside iwx_nvm_init().
But please try using the DELAY outside of a loop first.

Finding the right spot might take some time. Welcome to driver development :)

If you cannot find a spot for the DELAY that makes this work, then we will
have to wait for someone else who is seeing the same problem and tries harder.

diff 4a0fa473f5ea308b63ffd39645f73b2195291973 /usr/src
blob - 64c3641a2d0d07a9d899c0b7ccdbe46d46e17b96
file + sys/dev/pci/if_iwx.c
--- sys/dev/pci/if_iwx.c
+++ sys/dev/pci/if_iwx.c
@@ -3222,6 +3222,7 @@ iwx_run_init_mvm_ucode(struct iwx_softc *sc, int readn
 * Send init config command to mark that we are sending NVM
 * access commands
 */
+   DELAY(1000);
err = iwx_send_cmd_pdu(sc, IWX_WIDE_ID(IWX_SYSTEM_GROUP,
IWX_INIT_EXTENDED_CFG_CMD), 0, sizeof(init_cfg), &init_cfg);
if (err)



Re: 6.7 snaps upgrade went fine - Intel ax200ngw not so much

2020-05-13 Thread sven falempin
'good news'

I build a custom kernel with the DEBUG flag for the driver

ugen0 at uhub3 port 3 "Intel product 0x0029" rev 2.01/0.01 addr 2
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x203 done
iwx0: unexpected firmware response to command 0x203
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_cmd_done: command 0x88 done
iwx_send_cmd: sending command 0x88
iwx_cmd_done: command 0x88 done
iwx_send_cmd: sending command 0x88
iwx_cmd_done: command 0x88 done
iwx_send_cmd: sending command 0x88
iwx_cmd_done: command 0x88 done
iwx_send_cmd: sending command 0x88
iwx_cmd_done: command 0x88 done
iwx_send_cmd: sending command 0x88
iwx_cmd_done: command 0x88 done
iwx_send_cmd: sending command 0x88
iwx_cmd_done: command 0x88 done
iwx_send_cmd: sending command 0x88
iwx_cmd_done: command 0x88 done
iwx_send_cmd: sending command 0xc00
iwx_cmd_done: command 0xc00 done
iwx0: hw rev 0x340, fw ver 46.393952838.0, address f8:e4:e3:23:3c:46

I 'works' ,

shall i log more around like the reponse  here ?

iwx_cmd_done: command 0x203 done
iwx0: unexpected firmware response to command 0x203

Scanning worked !

-- 
--
-
Knowing is not enough; we must apply. Willing is not enough; we must do



Re: 6.7 snaps upgrade went fine - Intel ax200ngw not so much

2020-05-13 Thread sven falempin
On Wed, May 13, 2020 at 2:24 PM Stuart Henderson  wrote:
>
> On 2020/05/13 13:46, sven falempin wrote:
> > *Please*
> > advise how to squeeze more information to thwart that problem.
>
> If I had a card using a newly developed driver that was doing that,
> I would remove the card, offer to send it to somebody working on the
> driver if they want it, and replace it with an alternative..
>

It is possible to send the m2 wifi card out there to a Dev.
I can also recompile custom kernel with broad guidance
to dumping `things` .

As it is always good to have some test outside the dev bench.
I will check some #if DEBUG in the driver see if I can squeeze out
more information.

-- 
--
-
Knowing is not enough; we must apply. Willing is not enough; we must do



Re: 6.7 snaps upgrade went fine - Intel ax200ngw not so much

2020-05-13 Thread Stuart Henderson
On 2020/05/13 13:46, sven falempin wrote:
> *Please*
> advise how to squeeze more information to thwart that problem.

If I had a card using a newly developed driver that was doing that,
I would remove the card, offer to send it to somebody working on the
driver if they want it, and replace it with an alternative..



Re: 6.7 snaps upgrade went fine - Intel ax200ngw not so much

2020-05-13 Thread sven falempin
>
> OpenBSD 6.7 (GENERIC.MP) #182: Thu May  7 11:11:58 MDT 2020
> dera...@amd64.openbsd.org:/usr/src/sys/arch/amd64/compile/GENERIC.MP
> real mem = 7975399424 (7605MB)
> avail mem = 7721070592 (7363MB)
> mpath0 at root
> scsibus0 at mpath0: 256 targets
> mainbus0 at root
> bios0 at mainbus0: SMBIOS rev. 2.7 @ 0xebee0 (48 entries)
> bios0: vendor American Megatrends Inc. version "F2" date 06/20/2014
> bios0: Gigabyte Technology Co., Ltd. AM1M-S2H
> acpi0 at bios0: ACPI 5.0
> acpi0: sleep states S0 S3 S4 S5
> acpi0: tables DSDT FACP APIC FPDT MCFG HPET SSDT SSDT CRAT SSDT
> acpi0: wakeup devices BR11(S4) GPP0(S4) GPP1(S4) GBE_(S4) GPP2(S4) GPP3(S4) 
> SBAZ(S4) PS2K(S3) OHC1(S4) EHC1(S4) OHC2(S4) EHC2(S4) OHC3(S4) EHC3(S4) 
> XHC0(S4) PWRB(S3)
> acpitimer0 at acpi0: 3579545 Hz, 32 bits
> acpimadt0 at acpi0 addr 0xfee0: PC-AT compat
> cpu0 at mainbus0: apid 0 (boot processor)
> cpu0: AMD Athlon(tm) 5350 APU with Radeon(tm) R3, 2046.43 MHz, 16-00-01
> cpu0: 
> FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,IBS,SKINIT,TOPEXT,DBKP,PCTRL3,ITSC,BMI1,XSAVEOPT
> cpu0: 32KB 64b/line 2-way I-cache, 32KB 64b/line 8-way D-cache, 2MB 64b/line 
> 16-way L2 cache
> cpu0: ITLB 32 4KB entries fully associative, 8 4MB entries fully associative
> cpu0: DTLB 40 4KB entries fully associative, 8 4MB entries fully associative
> cpu0: smt 0, core 0, package 0
> mtrr: Pentium Pro MTRR support, 8 var ranges, 88 fixed ranges
> cpu0: apic clock running at 99MHz
> cpu0: mwait min=64, max=64, IBE
> cpu1 at mainbus0: apid 1 (application processor)
> cpu1: AMD Athlon(tm) 5350 APU with Radeon(tm) R3, 2046.16 MHz, 16-00-01
> cpu1: 
> FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,IBS,SKINIT,TOPEXT,DBKP,PCTRL3,ITSC,BMI1,XSAVEOPT
> cpu1: 32KB 64b/line 2-way I-cache, 32KB 64b/line 8-way D-cache, 2MB 64b/line 
> 16-way L2 cache
> cpu1: ITLB 32 4KB entries fully associative, 8 4MB entries fully associative
> cpu1: DTLB 40 4KB entries fully associative, 8 4MB entries fully associative
> cpu1: smt 0, core 1, package 0
> cpu2 at mainbus0: apid 2 (application processor)
> cpu2: AMD Athlon(tm) 5350 APU with Radeon(tm) R3, 2046.16 MHz, 16-00-01
> cpu2: 
> FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,IBS,SKINIT,TOPEXT,DBKP,PCTRL3,ITSC,BMI1,XSAVEOPT
> cpu2: 32KB 64b/line 2-way I-cache, 32KB 64b/line 8-way D-cache, 2MB 64b/line 
> 16-way L2 cache
> cpu2: ITLB 32 4KB entries fully associative, 8 4MB entries fully associative
> cpu2: DTLB 40 4KB entries fully associative, 8 4MB entries fully associative
> cpu2: smt 0, core 2, package 0
> cpu3 at mainbus0: apid 3 (application processor)
> cpu3: AMD Athlon(tm) 5350 APU with Radeon(tm) R3, 2046.16 MHz, 16-00-01
> cpu3: 
> FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,IBS,SKINIT,TOPEXT,DBKP,PCTRL3,ITSC,BMI1,XSAVEOPT
> cpu3: 32KB 64b/line 2-way I-cache, 32KB 64b/line 8-way D-cache, 2MB 64b/line 
> 16-way L2 cache
> cpu3: ITLB 32 4KB entries fully associative, 8 4MB entries fully associative
> cpu3: DTLB 40 4KB entries fully associative, 8 4MB entries fully associative
> cpu3: smt 0, core 3, package 0
> ioapic0 at mainbus0: apid 5 pa 0xfec0, version 21, 24 pins
> ioapic1 at mainbus0: apid 6 pa 0xfec01000, version 21, 32 pins
> acpimcfg0 at acpi0
> acpimcfg0: addr 0xe000, bus 0-255
> acpihpet0 at acpi0: 14318180 Hz
> acpiprt0 at acpi0: bus 0 (PCI0)
> acpiprt1 at acpi0: bus -1 (BR11)
> acpiprt2 at acpi0: bus 1 (GPP0)
> acpiprt3 at acpi0: bus 2 (GPP1)
> acpiprt4 at acpi0: bus -1 (GPP2)
> acpiprt5 at acpi0: bus -1 (GPP3)
> acpicpu0 at acpi0: C2(0@400 io@0x414), C1(@1 halt!), PSS
> acpicpu1 at acpi0: C2(0@400 io@0x414), C1(@1 halt!), PSS
> acpicpu2 at acpi0: C2(0@400 io@0x414), C1(@1 halt!), PSS
> acpicpu3 at acpi0: C2(0@400 io@0x414), C1(@1 halt!), PSS
> acpipci0 at acpi0 PCI0: 0x0010 0x0011 0x
> acpicmos0 at acpi0
> acpibtn0 at acpi0: PWRB
> cpu0: 2046 MHz: speeds: 2050 1850 1650 1400 1200 1000 800 MHz
> pci0 at mainbus0 bus 0
> pchb0 at pci0 dev 0 function 0 "AMD 16h Host" rev 0x00
> radeondrm0 at pci0 dev 1 function 0 "ATI Kabini" rev 0x00
> drm0 at radeondrm0
> radeondrm0: msi
> azalia0 at pci0 dev 1 function

Re: 6.7 snaps upgrade went fine - Intel ax200ngw not so much

2020-05-11 Thread sven falempin
On Mon, May 11, 2020 at 5:52 AM Stefan Sperling  wrote:

> On Sun, May 10, 2020 at 04:17:46PM -0400, sven falempin wrote:
> > On Sun, May 10, 2020 at 4:51 AM Stefan Sperling  wrote:
> >
> > > On Sat, May 09, 2020 at 04:23:08PM -0400, sven falempin wrote:
> > > > "no config, interface is down", Did not do anything special,
> > > > upgrade => Plug card => boot => crash
> > >
> > > > I tested with the intel firmware it does the same.
> > >
> > > I'm sorry, but there is really not enough information in your messages
> > > that would allow me to do anything other than just trying to somehow
> > > reproduce this problem by chance.
> > >
> >
> > I understand.
> >
> > there is nothing I did that is outside what I tell,
> > the problem is constant,
> > unavoidable
> > and requires 0 config
> > nor any command to enter.
>
> Yes, I believe what you are saying.
>
> The problem is that this error is not happening to me, and to diagnose it
> I need to see this same error happen on a machine I have in front of me.
> Once we reach that point, I can silently work on it until I find a fix.
> But before then, I cannot do anything. In order to try to replicate your
> setup as closely as possible, I need to know what your setup looks like.
>
> So, for example, knowing what hardware you have in front of you would be
> a good first step. But your report lacks a dmesg.
>
> Please follow the guidance given on https://www.openbsd.org/report.html
> Any bit of information that is requested there, if you can tell us about
> it,
> then please include it in your report. It will save us time in the long
> term.
>

I changed the PCI slot used , verify the USB power,
removed the other PCI card ( cleaner dmesg ).

I also have two m2 modules , both of them do the same :-(

Dmesg

OpenBSD 6.7 (GENERIC.MP) #182: Thu May  7 11:11:58 MDT 2020
dera...@amd64.openbsd.org:/usr/src/sys/arch/amd64/compile/GENERIC.MP
real mem = 7975399424 (7605MB)
avail mem = 7721070592 (7363MB)
mpath0 at root
scsibus0 at mpath0: 256 targets
mainbus0 at root
bios0 at mainbus0: SMBIOS rev. 2.7 @ 0xebee0 (48 entries)
bios0: vendor American Megatrends Inc. version "F2" date 06/20/2014
bios0: Gigabyte Technology Co., Ltd. AM1M-S2H
acpi0 at bios0: ACPI 5.0
acpi0: sleep states S0 S3 S4 S5
acpi0: tables DSDT FACP APIC FPDT MCFG HPET SSDT SSDT CRAT SSDT
acpi0: wakeup devices BR11(S4) GPP0(S4) GPP1(S4) GBE_(S4) GPP2(S4) GPP3(S4)
SBAZ(S4) PS2K(S3) OHC1(S4) EHC1(S4) OHC2(S4) EHC2(S4) OHC3(S4) EHC3(S4)
XHC0(S4) PWRB(S3)
acpitimer0 at acpi0: 3579545 Hz, 32 bits
acpimadt0 at acpi0 addr 0xfee0: PC-AT compat
cpu0 at mainbus0: apid 0 (boot processor)
cpu0: AMD Athlon(tm) 5350 APU with Radeon(tm) R3, 2046.43 MHz, 16-00-01
cpu0:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,IBS,SKINIT,TOPEXT,DBKP,PCTRL3,ITSC,BMI1,XSAVEOPT
cpu0: 32KB 64b/line 2-way I-cache, 32KB 64b/line 8-way D-cache, 2MB
64b/line 16-way L2 cache
cpu0: ITLB 32 4KB entries fully associative, 8 4MB entries fully associative
cpu0: DTLB 40 4KB entries fully associative, 8 4MB entries fully associative
cpu0: smt 0, core 0, package 0
mtrr: Pentium Pro MTRR support, 8 var ranges, 88 fixed ranges
cpu0: apic clock running at 99MHz
cpu0: mwait min=64, max=64, IBE
cpu1 at mainbus0: apid 1 (application processor)
cpu1: AMD Athlon(tm) 5350 APU with Radeon(tm) R3, 2046.16 MHz, 16-00-01
cpu1:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,IBS,SKINIT,TOPEXT,DBKP,PCTRL3,ITSC,BMI1,XSAVEOPT
cpu1: 32KB 64b/line 2-way I-cache, 32KB 64b/line 8-way D-cache, 2MB
64b/line 16-way L2 cache
cpu1: ITLB 32 4KB entries fully associative, 8 4MB entries fully associative
cpu1: DTLB 40 4KB entries fully associative, 8 4MB entries fully associative
cpu1: smt 0, core 1, package 0
cpu2 at mainbus0: apid 2 (application processor)
cpu2: AMD Athlon(tm) 5350 APU with Radeon(tm) R3, 2046.16 MHz, 16-00-01
cpu2:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,IBS,SKINIT,TOPEXT,DBKP,PCTRL3,ITSC,BMI1,XSAVEOPT
cpu2: 32KB 64b/line 2-way I-cache, 32KB 64b/line 8-way D-cache, 2MB
64b/line 16-way L2 cache
cpu2: ITLB 32 4KB entries fully associative, 8 4MB entries fully associative
cpu2: DTLB 40 4KB entries fully associative, 8 4MB entries fully associative
cpu2: smt 0, core 2, package 0
cpu3 at mainbus0: apid 3 (application processor)
cpu3: AMD Athlon(tm) 5350 APU with Radeon(tm) R3, 2046.16 MHz, 16-00-01
cpu3

Re: 6.7 snaps upgrade went fine - Intel ax200ngw not so much

2020-05-11 Thread Stefan Sperling
On Sun, May 10, 2020 at 04:17:46PM -0400, sven falempin wrote:
> On Sun, May 10, 2020 at 4:51 AM Stefan Sperling  wrote:
> 
> > On Sat, May 09, 2020 at 04:23:08PM -0400, sven falempin wrote:
> > > "no config, interface is down", Did not do anything special,
> > > upgrade => Plug card => boot => crash
> >
> > > I tested with the intel firmware it does the same.
> >
> > I'm sorry, but there is really not enough information in your messages
> > that would allow me to do anything other than just trying to somehow
> > reproduce this problem by chance.
> >
> 
> I understand.
> 
> there is nothing I did that is outside what I tell,
> the problem is constant,
> unavoidable
> and requires 0 config
> nor any command to enter.

Yes, I believe what you are saying.

The problem is that this error is not happening to me, and to diagnose it
I need to see this same error happen on a machine I have in front of me.
Once we reach that point, I can silently work on it until I find a fix.
But before then, I cannot do anything. In order to try to replicate your
setup as closely as possible, I need to know what your setup looks like.

So, for example, knowing what hardware you have in front of you would be
a good first step. But your report lacks a dmesg.

Please follow the guidance given on https://www.openbsd.org/report.html
Any bit of information that is requested there, if you can tell us about it,
then please include it in your report. It will save us time in the long term.



Re: 6.7 snaps upgrade went fine - Intel ax200ngw not so much

2020-05-10 Thread sven falempin
On Sun, May 10, 2020 at 4:51 AM Stefan Sperling  wrote:

> On Sat, May 09, 2020 at 04:23:08PM -0400, sven falempin wrote:
> > "no config, interface is down", Did not do anything special,
> > upgrade => Plug card => boot => crash
>
> > I tested with the intel firmware it does the same.
>
> I'm sorry, but there is really not enough information in your messages
> that would allow me to do anything other than just trying to somehow
> reproduce this problem by chance.
>

I understand.

there is nothing I did that is outside what I tell,
the problem is constant,
unavoidable
and requires 0 config
nor any command to enter.


-- 
--
-
Knowing is not enough; we must apply. Willing is not enough; we must do


Re: 6.7 snaps upgrade went fine - Intel ax200ngw not so much

2020-05-10 Thread Stefan Sperling
On Sat, May 09, 2020 at 04:23:08PM -0400, sven falempin wrote:
> "no config, interface is down", Did not do anything special,
> upgrade => Plug card => boot => crash

> I tested with the intel firmware it does the same.

I'm sorry, but there is really not enough information in your messages
that would allow me to do anything other than just trying to somehow
reproduce this problem by chance.



Re: 6.7 snaps upgrade went fine - Intel ax200ngw not so much

2020-05-09 Thread sven falempin
On Sat, May 9, 2020 at 4:14 AM Stefan Sperling  wrote:

> On Fri, May 08, 2020 at 11:51:50AM -0400, sven falempin wrote:
> > I upgraded to 6.7 - beta a tftp server i use
> >
> > Not much to report as the device is basic but i wanted to test some wifi
> on
> > it.
> >
> > iwx0 at pci8 dev 0 function 0 "Intel Wi-Fi 6 AX200" rev 0x1a, msix
> >
> > The firmware crashes at start,
>
> It looks like a failure of the NVM_ACCESS command:
>
> > iwx0: 0x00050088 | last host cmd
>
> #define IWX_NVM_ACCESS_CMD  0x88
>
> > no config down:
>
> What does "no config down" mean?
>
> If you could provide an exact sequence of steps anyone without prior
> knowledge
> could perform in order to repeat this problem, then I would take a look.
> Please don't assume that I already knew. I have never seen this error.
>

"no config, interface is down", Did not do anything special,
upgrade => Plug card => boot => crash

I tested with the intel firmware it does the same.

Full Dmesg :

OpenBSD 6.7 (GENERIC.MP) #182: Thu May  7 11:11:58 MDT 2020
dera...@amd64.openbsd.org:/usr/src/sys/arch/amd64/compile/GENERIC.MP
real mem = 7975399424 (7605MB)
avail mem = 7721066496 (7363MB)
mpath0 at root
scsibus0 at mpath0: 256 targets
mainbus0 at root
bios0 at mainbus0: SMBIOS rev. 2.7 @ 0xebee0 (48 entries)
bios0: vendor American Megatrends Inc. version "F2" date 06/20/2014
bios0: Gigabyte Technology Co., Ltd. AM1M-S2H
acpi0 at bios0: ACPI 5.0
acpi0: sleep states S0 S3 S4 S5
acpi0: tables DSDT FACP APIC FPDT MCFG HPET SSDT SSDT CRAT SSDT
acpi0: wakeup devices BR11(S4) GPP0(S4) GPP1(S4) GBE_(S4) GPP2(S4) GPP3(S4)
SBAZ(S4) PS2K(S3) OHC1(S4) EHC1(S4) OHC2(S4) EHC2(S4) OHC3(S4) EHC3(S4)
XHC0(S4) PWRB(S3)
acpitimer0 at acpi0: 3579545 Hz, 32 bits
acpimadt0 at acpi0 addr 0xfee0: PC-AT compat
cpu0 at mainbus0: apid 0 (boot processor)
cpu0: AMD Athlon(tm) 5350 APU with Radeon(tm) R3, 2046.44 MHz, 16-00-01
cpu0:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,IBS,SKINIT,TOPEXT,DBKP,PCTRL3,ITSC,BMI1,XSAVEOPT
cpu0: 32KB 64b/line 2-way I-cache, 32KB 64b/line 8-way D-cache, 2MB
64b/line 16-way L2 cache
cpu0: ITLB 32 4KB entries fully associative, 8 4MB entries fully associative
cpu0: DTLB 40 4KB entries fully associative, 8 4MB entries fully associative
cpu0: smt 0, core 0, package 0
mtrr: Pentium Pro MTRR support, 8 var ranges, 88 fixed ranges
cpu0: apic clock running at 99MHz
cpu0: mwait min=64, max=64, IBE
cpu1 at mainbus0: apid 1 (application processor)
cpu1: AMD Athlon(tm) 5350 APU with Radeon(tm) R3, 2046.17 MHz, 16-00-01
cpu1:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,IBS,SKINIT,TOPEXT,DBKP,PCTRL3,ITSC,BMI1,XSAVEOPT
cpu1: 32KB 64b/line 2-way I-cache, 32KB 64b/line 8-way D-cache, 2MB
64b/line 16-way L2 cache
cpu1: ITLB 32 4KB entries fully associative, 8 4MB entries fully associative
cpu1: DTLB 40 4KB entries fully associative, 8 4MB entries fully associative
cpu1: smt 0, core 1, package 0
cpu2 at mainbus0: apid 2 (application processor)
cpu2: AMD Athlon(tm) 5350 APU with Radeon(tm) R3, 2046.17 MHz, 16-00-01
cpu2:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,IBS,SKINIT,TOPEXT,DBKP,PCTRL3,ITSC,BMI1,XSAVEOPT
cpu2: 32KB 64b/line 2-way I-cache, 32KB 64b/line 8-way D-cache, 2MB
64b/line 16-way L2 cache
cpu2: ITLB 32 4KB entries fully associative, 8 4MB entries fully associative
cpu2: DTLB 40 4KB entries fully associative, 8 4MB entries fully associative
cpu2: smt 0, core 2, package 0
cpu3 at mainbus0: apid 3 (application processor)
cpu3: AMD Athlon(tm) 5350 APU with Radeon(tm) R3, 2046.17 MHz, 16-00-01
cpu3:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,IBS,SKINIT,TOPEXT,DBKP,PCTRL3,ITSC,BMI1,XSAVEOPT
cpu3: 32KB 64b/line 2-way I-cache, 32KB 64b/line 8-way D-cache, 2MB
64b/line 16-way L2 cache
cpu3: ITLB 32 4KB entries fully associative, 8 4MB entries fully associative
cpu3: DTLB 40 4KB entries fully associative, 8 4MB entries fully associative
cpu3: smt 0, core 3, package 0
ioapic0 at mainbus0: apid 5 pa 0xfec0, version 21, 24 pins
ioapic1 at mainbus0: apid 6 pa 0xfec01000, version 21, 32 pins
acpimcfg0 at acpi0
acpimcfg0: addr 0xe000, bus 0-255
acpihpet0 at acpi0: 

Re: 6.7 snaps upgrade went fine - Intel ax200ngw not so much

2020-05-09 Thread Stefan Sperling
On Fri, May 08, 2020 at 11:51:50AM -0400, sven falempin wrote:
> I upgraded to 6.7 - beta a tftp server i use
> 
> Not much to report as the device is basic but i wanted to test some wifi on
> it.
> 
> iwx0 at pci8 dev 0 function 0 "Intel Wi-Fi 6 AX200" rev 0x1a, msix
> 
> The firmware crashes at start,

It looks like a failure of the NVM_ACCESS command:

> iwx0: 0x00050088 | last host cmd

#define IWX_NVM_ACCESS_CMD  0x88

> no config down:

What does "no config down" mean?

If you could provide an exact sequence of steps anyone without prior knowledge
could perform in order to repeat this problem, then I would take a look.
Please don't assume that I already knew. I have never seen this error.



6.7 snaps upgrade went fine - Intel ax200ngw not so much

2020-05-08 Thread sven falempin
I upgraded to 6.7 - beta a tftp server i use

Not much to report as the device is basic but i wanted to test some wifi on
it.

iwx0 at pci8 dev 0 function 0 "Intel Wi-Fi 6 AX200" rev 0x1a, msix

The firmware crashes at start, no config down:

iwx0: dumping device error log
iwx0: Start Error Log Dump:
iwx0: Status: 0x1, count: 6
iwx0: 0x0071 | NMI_INTERRUPT_UMAC_FATAL
iwx0: 002022F0 | trm_hw_status0
iwx0:  | trm_hw_status1
iwx0: 004FC308 | branchlink2
iwx0: 00016E5A | interruptlink1
iwx0: 00016E5A | interruptlink2
iwx0: 004F9F62 | data1
iwx0: 1000 | data2
iwx0: F008 | data3
iwx0:  | beacon time
iwx0: 00011B6F | tsf low
iwx0:  | tsf hi
iwx0:  | time gp1
iwx0: 00011B6F | time gp2
iwx0: 0001 | uCode revision type
iwx0: 002E | uCode version major
iwx0: 177B3E46 | uCode version minor
iwx0: 0340 | hw version
iwx0: 00889000 | board version
iwx0: 800AFD0C | hcmd
iwx0: 0002 | isr0
iwx0:  | isr1
iwx0: 18F2 | isr2
iwx0: 04CC | isr3
iwx0:  | isr4
iwx0:  | last cmd Id
iwx0: 004F9F62 | wait_event
iwx0:  | l2p_control
iwx0: 0020 | l2p_duration
iwx0:  | l2p_mhvalid
iwx0:  | l2p_addr_match
iwx0: 0009 | lmpm_pmg_sel
iwx0: 19071335 | timestamp
iwx0: 0024 | flow_handler
iwx0: Start UMAC Error Log Dump:
iwx0: Status: 0x1, count: 7
iwx0: 0x201010A3 | ADVANCED_SYSASSERT
iwx0: 0x | umac branchlink1
iwx0: 0xC008B1C0 | umac branchlink2
iwx0: 0xC0084E04 | umac interruptlink1
iwx0: 0x | umac interruptlink2
iwx0: 0x0002 | umac data1
iwx0: 0x0001 | umac data2
iwx0: 0xDEADBEEF | umac data3
iwx0: 0x002E | umac major
iwx0: 0x177B3E46 | umac minor
iwx0: 0x00011B60 | frame pointer
iwx0: 0xC0886C6C | stack pointer
iwx0: 0x00050088 | last host cmd
iwx0: 0x | isr status reg
driver status:
  tx ring  0: qid=0  cur=5   queued=0
  tx ring  1: qid=1  cur=0   queued=0
  tx ring  2: qid=2  cur=0   queued=0
  tx ring  3: qid=3  cur=0   queued=0
  tx ring  4: qid=4  cur=0   queued=0
  tx ring  5: qid=5  cur=0   queued=0
  tx ring  6: qid=6  cur=0   queued=0
  tx ring  7: qid=7  cur=0   queued=0
  tx ring  8: qid=8  cur=0   queued=0
  tx ring  9: qid=9  cur=0   queued=0
  tx ring 10: qid=10 cur=0   queued=0
  tx ring 11: qid=11 cur=0   queued=0
  tx ring 12: qid=12 cur=0   queued=0
  tx ring 13: qid=13 cur=0   queued=0
  tx ring 14: qid=14 cur=0   queued=0
  tx ring 15: qid=15 cur=0   queued=0
  tx ring 16: qid=16 cur=0   queued=0
  tx ring 17: qid=17 cur=0   queued=0
  tx ring 18: qid=18 cur=0   queued=0
  tx ring 19: qid=19 cur=0   queued=0
  tx ring 20: qid=20 cur=0   queued=0
  tx ring 21: qid=21 cur=0   queued=0
  tx ring 22: qid=22 cur=0   queued=0
  tx ring 23: qid=23 cur=0   queued=0
  tx ring 24: qid=24 cur=0   queued=0
  tx ring 25: qid=25 cur=0   queued=0
  tx ring 26: qid=26 cur=0   queued=0
  tx ring 27: qid=27 cur=0   queued=0
  tx ring 28: qid=28 cur=0   queued=0
  tx ring 29: qid=29 cur=0   queued=0
  tx ring 30: qid=30 cur=0   queued=0
  rx ring: cur=263
  802.11 state INIT
iwx0: fatal firmware error

ifconfig  iwx0 up creates the crashes.

Could it be the beta did not load the right firmware ? or is there come MAC
address restriction ?

# strings /etc/firmware/iwx-cc-a0-46  | grep rel
release/core43_pv::177b3e46
# md5 /etc/firmware/iwx-cc-a0-46
MD5 (/etc/firmware/iwx-cc-a0-46) = babe453e0bc18ec93768ec6f002d8229

-- 
--
-
Knowing is not enough; we must apply. Willing is not enough; we must do