Re: panic in arptimer in r289937

2015-12-10 Thread Randall Stewart
Hans:

Though it would not hurt to add your patch, its not possible
for callout_reset() to return anything but 1 or 0.

Only callout_stop(), callout_drain(), callout_async_drain() can return -1.

So I don’t think that this will fix it.

R
On Dec 4, 2015, at 11:34 AM, Hans Petter Selasky  wrote:

 .


Randall Stewart
r...@netflix.com
803-317-4952





___
freebsd-current@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-current
To unsubscribe, send any mail to "freebsd-current-unsubscr...@freebsd.org"


Re: panic in arptimer in r289937

2015-12-10 Thread Hans Petter Selasky

Hi,

Here is the backtrace for a reproducable panic seen with arptimer():


#0  doadump (textdump=0) at pcpu.h:221
#1  0x80385afb in db_dump (dummy=, dummy2=false, 
dummy3=0, dummy4=0x0) at /usr/img/freebsd/sys/ddb/db_command.c:533
#2  0x803858ee in db_command (cmd_table=0x0) at 
/usr/img/freebsd/sys/ddb/db_command.c:440
#3  0x80385684 in db_command_loop () at 
/usr/img/freebsd/sys/ddb/db_command.c:493
#4  0x8038818b in db_trap (type=, code=0) at 
/usr/img/freebsd/sys/ddb/db_main.c:251
#5  0x80ae0973 in kdb_trap (type=12, code=0, tf=) 
at /usr/img/freebsd/sys/kern/subr_kdb.c:654
#6  0x80f276f1 in trap_fatal (frame=0xfe00f59f4950, eva=) at /usr/img/freebsd/sys/amd64/amd64/trap.c:829
#7  0x80f27924 in trap_pfault (frame=0xfe00f59f4950, usermode=) at /usr/img/freebsd/sys/amd64/amd64/trap.c:684
#8  0x80f270de in trap (frame=0xfe00f59f4950) at 
/usr/img/freebsd/sys/amd64/amd64/trap.c:435
#9  0x80f0a347 in calltrap () at 
/usr/img/freebsd/sys/amd64/amd64/exception.S:234
#10 0x80be9e3d in arptimer (arg=0xf8011d0fda00) at atomic.h:184
#11 0x80ab54f1 in softclock_call_cc (c=0xf8011d0fdaa8, 
cc=0x81ccd600, direct=)
at /usr/img/freebsd/sys/kern/kern_timeout.c:832
#12 0x80ab5814 in softclock (arg=0x81ccd600) at 
/usr/img/freebsd/sys/kern/kern_timeout.c:921
#13 0x80a5d7f6 in intr_event_execute_handlers (p=, 
ie=0xf80003998b00) at /usr/img/freebsd/sys/kern/kern_intr.c:1262
#14 0x80a5de06 in ithread_loop (arg=0xf8000396cde0) at 
/usr/img/freebsd/sys/kern/kern_intr.c:1275
#15 0x80a5a87c in fork_exit (callout=0x80a5dd60 , 
arg=0xf8000396cde0, frame=0xfe00f59f4c00)
at /usr/img/freebsd/sys/kern/kern_fork.c:1011
#16 0x80f0a87e in fork_trampoline () at 
/usr/img/freebsd/sys/amd64/amd64/exception.S:609
#17 0x in ?? ()


(kgdb) print  ((struct llentry *)arg)[0]
$5 = {
  lle_next = {
le_next = 0x0,
le_prev = 0xf80069b5fa98
  },
  r_l3addr = {
addr4 = {
  s_addr = 1563742475
},
addr6 = {
  __u6_addr = {
__u6_addr8 = 0xf8011d0fda10 "\v�4]",
__u6_addr16 = 0xf8011d0fda10,
__u6_addr32 = 0xf8011d0fda10
  }
}
  },
  ll_addr = {
mac_aligned = 121984137371108,
mac16 = 0xf8011d0fda20,
mac8 = 0xf8011d0fda20 "�\035-��n"
  },
  r_flags = 1,
  r_skip_req = 1,
  spare1 = 0,
  lle_tbl = 0xf80005653300,
  lle_head = 0xf80069b5fa98,
  lle_free = 0x80bf2270 ,
  la_hold = 0x0,
  la_numheld = 0,
  la_expire = 12422,
  la_flags = 1,
  la_asked = 0,
  la_preempt = 5,
  ln_state = 2,
  ln_router = 0,
  ln_ntick = 0,
  lle_refcnt = 1,
  lle_chain = {
le_next = 0x0,
le_prev = 0x0
  },
  lle_timer = {
c_links = {
  le = {
le_next = 0x0,
le_prev = 0x81ccd718
  },
  sle = {
sle_next = 0x0
  },
  tqe = {
tqe_next = 0x0,
tqe_prev = 0x81ccd718
  }
},
c_time = 53354272998546,
c_precision = 268435437,
c_arg = 0xf8011d0fda00,
c_func = 0x80be9950 ,
c_lock = 0x0,
c_flags = 16,
c_cpu = 0
  },
  lle_lock = {
lock_object = {
  lo_name = 0x8144abf0 "lle",
  lo_flags = 90374144,
  lo_data = 0,
  lo_witness = 0x0
},
rw_lock = 1
  },
  req_mtx = {
lock_object = {
  lo_name = 0x8144abf4 "lle req",
  lo_flags = 16973824,
  lo_data = 0,
  lo_witness = 0x0
},
mtx_lock = 4
  }
}

(kgdb) print /x ((struct llentry *)arg)->lle_tbl[0]
$6 = {
  llt_link = {
sle_next = 0xf8004be51900
  },
  llt_af = 0x1d243aa0,
  llt_hsize = 0xf801,
  lle_head = 0xf800053e5330,
  llt_ifp = 0xd04,
  llt_lookup = 0x0,
  llt_alloc_entry = 0xfe0001a472f0,
  llt_delete_entry = 0xf800504a5100,
  llt_prefix_free = 0xf800503d8c88,
  llt_dump_entry = 0x0,
  llt_hash = 0x0,
  llt_match_prefix = 0x0,
  llt_free_entry = 0x0,
  llt_foreach_entry = 0x0,
  llt_link_entry = 0x0,
  llt_unlink_entry = 0x38e425e,
  llt_fill_sa_entry = 0x0,
  llt_free_tbl = 0xf8011d243ab0
}

It appears arptimer() was called after lltable_unlink_entry() was 
called, because la_flags does not have the LLE_LINKED bit set, which can 
happen!! If arptimer() is firing exactly when we call 
lltable_unlink_entry(), then arptimer() will refer to freed memory. Does 
the following patch make sense?



Index: netinet/if_ether.c
===
--- netinet/if_ether.c  (revision 291256)
+++ netinet/if_ether.c  (working copy)
@@ -185,7 +185,13 @@
LLE_WUNLOCK(lle);
return;
}
-   ifp = lle->lle_tbl->llt_ifp;
+   if (lle->la_flags & LLE_LINKED) {
+   ifp = lle->lle_tbl->llt_ifp;
+   } else {
+   /* XXX RACE entry has been freed */
+   llentry_free(lle);
+

Re: panic in arptimer in r289937

2015-12-10 Thread Hans Petter Selasky

On 12/10/15 16:25, Randall Stewart wrote:

For callout_stop/drain you get

-1 == Callout as already gone off or is not running (usually the latter) else 
the caller iks
   not using locking properly or did not lock and check the active() 
value (which would have returned not active so no stop
   would have been needed);
0 == we could not stop the callout, it was in process..
1 == it was stopped successfully


For callout_reset() you get

0 == it was started
1 == it was restarted.

There is no -1 since it is either started or restarted never left in a 
not-running state...



Hi,

Correct, though if the return values would be the same, it might 
simplify the callout API and manual page a bit:


/* return values for all callout_xxx() functions */
#define CALLOUT_RET_DRAINING0 /* callout cannot be stopped, need 
drain */

#define CALLOUT_RET_CANCELLED   1 /* callout was successfully stopped */
#define CALLOUT_RET_STOPPED -1 /* callout was already stopped */

Then callout_reset() would return -1, if it was started from the stopped 
state.


--HPS

___
freebsd-current@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-current
To unsubscribe, send any mail to "freebsd-current-unsubscr...@freebsd.org"


Re: panic in arptimer in r289937

2015-12-10 Thread Hans Petter Selasky

On 12/10/15 16:35, Randall Stewart wrote:

If you did that it would change the KPI a bit meaning lots
of thrashing in the code.



Hi,

There are only 5 consumers of the callout_reset() return code in the 
FreeBSD 11-current kernel from what I can see:


grep -r "= callout_reset" sys/ | wc -l
   5

Two of these are already using > 0 checks.

--HPS
___
freebsd-current@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-current
To unsubscribe, send any mail to "freebsd-current-unsubscr...@freebsd.org"


Re: panic in arptimer in r289937

2015-12-10 Thread Randall Stewart
For callout_stop/drain you get

-1 == Callout as already gone off or is not running (usually the latter) else 
the caller iks
  not using locking properly or did not lock and check the active() 
value (which would have returned not active so no stop
  would have been needed);
0 == we could not stop the callout, it was in process..
1 == it was stopped successfully 


For callout_reset() you get

0 == it was started
1 == it was restarted.

There is no -1 since it is either started or restarted never left in a 
not-running state...

R
On Dec 4, 2015, at 11:52 AM, Hans Petter Selasky  wrote:

> On 12/04/15 20:34, Hans Petter Selasky wrote:
>> Hi Adrian,
>> 
>> On 10/31/15 16:01, Alexander V. Chernikov wrote:
>>> 
>>> 
>>> 31.10.2015, 16:46, "Adrian Chadd" :
 On 31 October 2015 at 09:34, Alexander V. Chernikov
  wrote:
>  31.10.2015, 05:32, "Adrian Chadd" :
>>  Hiya,
>> 
>>  Here's a panic from arptimer:
>  Hi Adrian,
> 
>  As far as I see, line 205 in if_ether.c is IF_AFDATA_LOCK(ifp)
> which happens after LLE_WUNLOCK().
>  So, it looks like (pre-cached) ifp had been freed before locking
> ifdata.
>  Do you have any more details on that? (e.g. was some interface
> detached at that moment, is it reproducible, etc..)
> 
>  From a quick glance, potential use-after-free has been possible
> for quite a long time, but I wonder why it hasn't been observed before.
>  Probably lltable_free() changes might have triggered that.
> 
>  I'll take a deeper look on that and reply.
 
>> 
>> Observed on an idle box with projects/hps_head too:
>> 
>>> panic: bogus refcnt 0 on lle 0xf8016508ca00
>>> cpuid = 7
>>> KDB: stack backtrace:
>>> db_trace_self_wrapper() at db_trace_self_wrapper+0x2b/frame
>>> 0xfe03e4e8c7e0
>>> vpanic() at vpanic+0x182/frame 0xfe03e4e8c860
>>> kassert_panic() at kassert_panic+0x126/frame 0xfe03e4e8c8d0
>>> llentry_free() at llentry_free+0x136/frame 0xfe03e4e8c900
>>> arptimer() at arptimer+0x20e/frame 0xfe03e4e8c950
>>> softclock_call_cc() at softclock_call_cc+0x170/frame 0xfe03e4e8c9c0
>>> softclock() at softclock+0x47/frame 0xfe03e4e8c9e0
>>> intr_event_execute_handlers() at
>>> intr_event_execute_handlers+0x96/frame 0xfe03e4e8ca20
>>> ithread_loop() at ithread_loop+0xa6/frame 0xfe03e4e8ca70
>>> fork_exit() at fork_exit+0x84/frame 0xfe03e4e8cab0
>>> fork_trampoline() at fork_trampoline+0xe/frame 0xfe03e4e8cab0
>>> --- trap 0, rip = 0, rsp = 0, rbp = 0 ---
>> 
>> Looks like callout_reset() must be examined too, and was missed by:
>> 
>> https://svnweb.freebsd.org/changeset/base/290805
>> 
>> Can you try the attached patch?
>> 
>> Randall: Can you fix this ASAP?
>> 
>> --HPS
>> 
> 
> Hi,
> 
> Randall:
> 
> I see for 11-current, callout_reset() doesn't return -1 when the callout is 
> stopped like with callout_stop(). Is this a bug or a feature? Why can't the 
> callout_reset() and callout_stop() functions use the same return values?
> 
> In nd6_llinfo_settimer_locked() the return value of both callout_reset() and 
> callout_stop() is checked for positive values, but not in the other places 
> mentioned by my patch.
> 
> --HPS


Randall Stewart
r...@netflix.com
803-317-4952





___
freebsd-current@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-current
To unsubscribe, send any mail to "freebsd-current-unsubscr...@freebsd.org"


Re: panic in arptimer in r289937

2015-12-10 Thread Randall Stewart
If you did that it would change the KPI a bit meaning lots
of thrashing in the code.

And on top of that you now would no longer return 0..

You would get 1 it was restarted or -1 it was not running but is now started.

Makes no sense to me sorry..

R
On Dec 10, 2015, at 7:35 AM, Hans Petter Selasky  wrote:

> On 12/10/15 16:25, Randall Stewart wrote:
>> For callout_stop/drain you get
>> 
>> -1 == Callout as already gone off or is not running (usually the latter) 
>> else the caller iks
>>   not using locking properly or did not lock and check the active() 
>> value (which would have returned not active so no stop
>>   would have been needed);
>> 0 == we could not stop the callout, it was in process..
>> 1 == it was stopped successfully
>> 
>> 
>> For callout_reset() you get
>> 
>> 0 == it was started
>> 1 == it was restarted.
>> 
>> There is no -1 since it is either started or restarted never left in a 
>> not-running state...
>> 
> 
> Hi,
> 
> Correct, though if the return values would be the same, it might simplify the 
> callout API and manual page a bit:
> 
> /* return values for all callout_xxx() functions */
> #define CALLOUT_RET_DRAINING0 /* callout cannot be stopped, need drain */
> #define CALLOUT_RET_CANCELLED   1 /* callout was successfully stopped */
> #define CALLOUT_RET_STOPPED -1 /* callout was already stopped */
> 
> Then callout_reset() would return -1, if it was started from the stopped 
> state.
> 
> --HPS


Randall Stewart
r...@netflix.com
803-317-4952





___
freebsd-current@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-current
To unsubscribe, send any mail to "freebsd-current-unsubscr...@freebsd.org"


Re: panic in arptimer in r289937

2015-12-04 Thread Hans Petter Selasky

On 12/04/15 20:34, Hans Petter Selasky wrote:

Hi Adrian,

On 10/31/15 16:01, Alexander V. Chernikov wrote:



31.10.2015, 16:46, "Adrian Chadd" :

On 31 October 2015 at 09:34, Alexander V. Chernikov
 wrote:

  31.10.2015, 05:32, "Adrian Chadd" :

  Hiya,

  Here's a panic from arptimer:

  Hi Adrian,

  As far as I see, line 205 in if_ether.c is IF_AFDATA_LOCK(ifp)
which happens after LLE_WUNLOCK().
  So, it looks like (pre-cached) ifp had been freed before locking
ifdata.
  Do you have any more details on that? (e.g. was some interface
detached at that moment, is it reproducible, etc..)

  From a quick glance, potential use-after-free has been possible
for quite a long time, but I wonder why it hasn't been observed before.
  Probably lltable_free() changes might have triggered that.

  I'll take a deeper look on that and reply.




Observed on an idle box with projects/hps_head too:


panic: bogus refcnt 0 on lle 0xf8016508ca00
cpuid = 7
KDB: stack backtrace:
db_trace_self_wrapper() at db_trace_self_wrapper+0x2b/frame
0xfe03e4e8c7e0
vpanic() at vpanic+0x182/frame 0xfe03e4e8c860
kassert_panic() at kassert_panic+0x126/frame 0xfe03e4e8c8d0
llentry_free() at llentry_free+0x136/frame 0xfe03e4e8c900
arptimer() at arptimer+0x20e/frame 0xfe03e4e8c950
softclock_call_cc() at softclock_call_cc+0x170/frame 0xfe03e4e8c9c0
softclock() at softclock+0x47/frame 0xfe03e4e8c9e0
intr_event_execute_handlers() at
intr_event_execute_handlers+0x96/frame 0xfe03e4e8ca20
ithread_loop() at ithread_loop+0xa6/frame 0xfe03e4e8ca70
fork_exit() at fork_exit+0x84/frame 0xfe03e4e8cab0
fork_trampoline() at fork_trampoline+0xe/frame 0xfe03e4e8cab0
--- trap 0, rip = 0, rsp = 0, rbp = 0 ---


Looks like callout_reset() must be examined too, and was missed by:

https://svnweb.freebsd.org/changeset/base/290805

Can you try the attached patch?

Randall: Can you fix this ASAP?

--HPS



Hi,

Randall:

I see for 11-current, callout_reset() doesn't return -1 when the callout 
is stopped like with callout_stop(). Is this a bug or a feature? Why 
can't the callout_reset() and callout_stop() functions use the same 
return values?


In nd6_llinfo_settimer_locked() the return value of both callout_reset() 
and callout_stop() is checked for positive values, but not in the other 
places mentioned by my patch.


--HPS
___
freebsd-current@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-current
To unsubscribe, send any mail to "freebsd-current-unsubscr...@freebsd.org"


Re: panic in arptimer in r289937

2015-12-04 Thread Hans Petter Selasky

Hi Adrian,

On 10/31/15 16:01, Alexander V. Chernikov wrote:



31.10.2015, 16:46, "Adrian Chadd" :

On 31 October 2015 at 09:34, Alexander V. Chernikov
 wrote:

  31.10.2015, 05:32, "Adrian Chadd" :

  Hiya,

  Here's a panic from arptimer:

  Hi Adrian,

  As far as I see, line 205 in if_ether.c is IF_AFDATA_LOCK(ifp) which happens 
after LLE_WUNLOCK().
  So, it looks like (pre-cached) ifp had been freed before locking ifdata.
  Do you have any more details on that? (e.g. was some interface detached at 
that moment, is it reproducible, etc..)

  From a quick glance, potential use-after-free has been possible for quite a 
long time, but I wonder why it hasn't been observed before.
  Probably lltable_free() changes might have triggered that.

  I'll take a deeper look on that and reply.




Observed on an idle box with projects/hps_head too:


panic: bogus refcnt 0 on lle 0xf8016508ca00
cpuid = 7
KDB: stack backtrace:
db_trace_self_wrapper() at db_trace_self_wrapper+0x2b/frame 0xfe03e4e8c7e0
vpanic() at vpanic+0x182/frame 0xfe03e4e8c860
kassert_panic() at kassert_panic+0x126/frame 0xfe03e4e8c8d0
llentry_free() at llentry_free+0x136/frame 0xfe03e4e8c900
arptimer() at arptimer+0x20e/frame 0xfe03e4e8c950
softclock_call_cc() at softclock_call_cc+0x170/frame 0xfe03e4e8c9c0
softclock() at softclock+0x47/frame 0xfe03e4e8c9e0
intr_event_execute_handlers() at intr_event_execute_handlers+0x96/frame 
0xfe03e4e8ca20
ithread_loop() at ithread_loop+0xa6/frame 0xfe03e4e8ca70
fork_exit() at fork_exit+0x84/frame 0xfe03e4e8cab0
fork_trampoline() at fork_trampoline+0xe/frame 0xfe03e4e8cab0
--- trap 0, rip = 0, rsp = 0, rbp = 0 ---


Looks like callout_reset() must be examined too, and was missed by:

https://svnweb.freebsd.org/changeset/base/290805

Can you try the attached patch?

Randall: Can you fix this ASAP?

--HPS
diff --git a/sys/dev/oce/oce_if.c b/sys/dev/oce/oce_if.c
index 826cd3c..1cca876 100644
--- a/sys/dev/oce/oce_if.c
+++ b/sys/dev/oce/oce_if.c
@@ -343,7 +343,7 @@ oce_attach(device_t dev)
 
 	callout_init(>timer, 1);
 	rc = callout_reset(>timer, 2 * hz, oce_local_timer, sc);
-	if (rc)
+	if (rc > 0)
 		goto stats_free;
 
 	return 0;
diff --git a/sys/netinet/if_ether.c b/sys/netinet/if_ether.c
index dfba0b2..0aec1c4 100644
--- a/sys/netinet/if_ether.c
+++ b/sys/netinet/if_ether.c
@@ -420,7 +420,7 @@ arpresolve_full(struct ifnet *ifp, int is_gw, int create, struct mbuf *m,
 		la->la_expire = time_uptime;
 		canceled = callout_reset(>lle_timer, hz * V_arpt_down,
 		arptimer, la);
-		if (canceled)
+		if (canceled > 0)
 			LLE_REMREF(la);
 		la->la_asked++;
 		LLE_WUNLOCK(la);
@@ -1084,7 +1084,7 @@ arp_mark_lle_reachable(struct llentry *la)
 		la->la_expire = time_uptime + V_arpt_keep;
 		canceled = callout_reset(>lle_timer,
 		hz * V_arpt_keep, arptimer, la);
-		if (canceled)
+		if (canceled > 0)
 			LLE_REMREF(la);
 	}
 	la->la_asked = 0;
___
freebsd-current@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-current
To unsubscribe, send any mail to "freebsd-current-unsubscr...@freebsd.org"

Re: panic in arptimer in r289937

2015-10-31 Thread Alexander V . Chernikov


31.10.2015, 05:32, "Adrian Chadd" :
> Hiya,
>
> Here's a panic from arptimer:
Hi Adrian,

As far as I see, line 205 in if_ether.c is IF_AFDATA_LOCK(ifp) which happens 
after LLE_WUNLOCK().
So, it looks like (pre-cached) ifp had been freed before locking ifdata.
Do you have any more details on that? (e.g. was some interface detached at that 
moment, is it reproducible, etc..)

From a quick glance, potential use-after-free has been possible for quite a 
long time, but I wonder why it hasn't been observed before.
Probably lltable_free() changes might have triggered that.

I'll take a deeper look on that and reply.

>
> (kgdb) bt
> #0 doadump (textdump=0) at pcpu.h:221
> #1 0x803666b6 in db_fncall (dummy1=,
> dummy2=, dummy3=,
> dummy4=) at
> /usr/home/adrian/work/freebsd/head/src/sys/ddb/db_command.c:568
> #2 0x8036614e in db_command (cmd_table=0x0) at
> /usr/home/adrian/work/freebsd/head/src/sys/ddb/db_command.c:440
> #3 0x80365ee4 in db_command_loop () at
> /usr/home/adrian/work/freebsd/head/src/sys/ddb/db_command.c:493
> #4 0x8036897b in db_trap (type=, code=0)
> at /usr/home/adrian/work/freebsd/head/src/sys/ddb/db_main.c:251
> #5 0x8096c0f3 in kdb_trap (type=9, code=0, tf= optimized out>) at
> /usr/home/adrian/work/freebsd/head/src/sys/kern/subr_kdb.c:654
> #6 0x80d34c81 in trap_fatal (frame=0xfe022815d7a0,
> eva=) at
> /usr/home/adrian/work/freebsd/head/src/sys/amd64/amd64/trap.c:829
> #7 0x80d34951 in trap (frame=) at
> /usr/home/adrian/work/freebsd/head/src/sys/amd64/amd64/trap.c:203
> #8 0x80d149f7 in calltrap () at
> /usr/home/adrian/work/freebsd/head/src/sys/amd64/amd64/exception.S:234
> #9 0x8092c3fb in _rw_wlock_cookie (c=0xdeadc0dedeadc2de,
> file=0x81211b1f
> "/usr/home/adrian/work/freebsd/head/src/sys/netinet/if_ether.c",
> line=205)
> at /usr/home/adrian/work/freebsd/head/src/sys/kern/kern_rwlock.c:261
> #10 0x80a2487f in arptimer (arg=0xf8005ecc4000) at
> /usr/home/adrian/work/freebsd/head/src/sys/netinet/if_ether.c:205
> #11 0x80944c24 in softclock_call_cc (c=0xf8005ecc40a8,
> cc=0x81b2d480, direct=0) at
> /usr/home/adrian/work/freebsd/head/src/sys/kern/kern_timeout.c:722
> #12 0x80944f87 in softclock (arg=) at
> /usr/home/adrian/work/freebsd/head/src/sys/kern/kern_timeout.c:851
> #13 0x808f7eb6 in intr_event_execute_handlers (p= optimized out>, ie=0xf800035a6600) at
> /usr/home/adrian/work/freebsd/head/src/sys/kern/kern_intr.c:1262
> #14 0x808f8546 in ithread_loop (arg=0xf800032c47c0) at
> /usr/home/adrian/work/freebsd/head/src/sys/kern/kern_intr.c:1275
> #15 0x808f57a4 in fork_exit (callout=0x808f84a0
> , arg=0xf800032c47c0, frame=0xfe022815dac0) at
> /usr/home/adrian/work/freebsd/head/src/sys/kern/kern_fork.c:1011
> #16 0x80d14f2e in fork_trampoline () at
> /usr/home/adrian/work/freebsd/head/src/sys/amd64/amd64/exception.S:609
> #17 0x in ?? ()
> Current language: auto; currently minimal
>
> (kgdb) print *(struct llentry *)c_arg
> $2 = {lle_next = {le_next = 0x0, le_prev = 0xf8005e867dc8},
> r_l3addr = {addr4 = {s_addr = 16782508}, addr6 = {__u6_addr =
> {__u6_addr8 = 0xf8005ecc4010 "�\024", __u6_addr16 =
> 0xf8005ecc4010,
> __u6_addr32 = 0xf8005ecc4010}}}, ll_addr = {mac_aligned =
> 110869256150596, mac16 = 0xf8005ecc4020, mac8 = 0xf8005ecc4020
> "D\036���d"}, spare0 = 0, spare1 = 0, lle_tbl = 0xf8005e867e00,
>   lle_head = 0xf8005e867dc8, lle_free = 0x80a2c5d0
> , la_hold = 0x0, la_numheld = 0, la_expire =
> 2110, la_flags = 1, la_asked = 0, la_preempt = 5, ln_state = 0,
> ln_router = 0, ln_ntick = 0,
>   lle_refcnt = 1, lle_chain = {le_next = 0x0, le_prev = 0x0},
> lle_timer = {c_links = {le = {le_next = 0x0, le_prev =
> 0x81b2d588}, sle = {sle_next = 0x0}, tqe = {tqe_next = 0x0,
> tqe_prev = 0x81b2d588}},
> c_time = 9066299815445, c_precision = 322122525000, c_arg =
> 0xf8005ecc4000, c_func = 0x80a246e0 , c_lock =
> 0x0, c_flags = 0, c_iflags = 144, c_cpu = 0}, lle_lock = {lock_object
> = {
>   lo_name = 0x8120fbce "lle", lo_flags = 90374144, lo_data
> = 0, lo_witness = 0xfeb53c80}, rw_lock = 1}}
>
> ..
>
> (kgdb) print *((struct llentry *)c_arg)->lle_tbl
> $4 = {llt_link = {sle_next = 0xdeadc0dedeadc0de}, llt_af = -559038242,
> llt_hsize = -559038242, lle_head = 0xdeadc0dedeadc0de, llt_ifp =
> 0xdeadc0dedeadc0de, llt_lookup = 0xdeadc0dedeadc0de,
>   llt_alloc_entry = 0xdeadc0dedeadc0de, llt_delete_entry =
> 0xdeadc0dedeadc0de, llt_prefix_free = 0xdeadc0dedeadc0de,
> llt_dump_entry = 0xdeadc0dedeadc0de, llt_hash = 0xdeadc0dedeadc0de,
> llt_match_prefix = 0xdeadc0dedeadc0de,
>   llt_free_entry = 0xdeadc0dedeadc0de, llt_foreach_entry =
> 0xdeadc0dedeadc0de, llt_link_entry = 0xdeadc0dedeadc0de,
> llt_unlink_entry = 0xdeadc0dedeadc0de, llt_fill_sa_entry =
> 

Re: panic in arptimer in r289937

2015-10-31 Thread Adrian Chadd
On 31 October 2015 at 09:34, Alexander V. Chernikov
 wrote:
>
>
> 31.10.2015, 05:32, "Adrian Chadd" :
>> Hiya,
>>
>> Here's a panic from arptimer:
> Hi Adrian,
>
> As far as I see, line 205 in if_ether.c is IF_AFDATA_LOCK(ifp) which happens 
> after LLE_WUNLOCK().
> So, it looks like (pre-cached) ifp had been freed before locking ifdata.
> Do you have any more details on that? (e.g. was some interface detached at 
> that moment, is it reproducible, etc..)
>
> From a quick glance, potential use-after-free has been possible for quite a 
> long time, but I wonder why it hasn't been observed before.
> Probably lltable_free() changes might have triggered that.
>
> I'll take a deeper look on that and reply.

Hiya!

Thanks for your quick response.

I mean, I use wifi, and ARPs can get lost / transmit can get delayed /
etc. I'm also testing through a MIPS CPU based bridge, so I'm also not
bridging at line rate. (The above is from one of the x86 laptops doing
the traffic test.) These are both reasons why I may be poking at a
path that you don't normally see. :)

I appreciate you taking a very quick look at this!

Thanks,



-adrian

>
>>
>> (kgdb) bt
>> #0 doadump (textdump=0) at pcpu.h:221
>> #1 0x803666b6 in db_fncall (dummy1=,
>> dummy2=, dummy3=,
>> dummy4=) at
>> /usr/home/adrian/work/freebsd/head/src/sys/ddb/db_command.c:568
>> #2 0x8036614e in db_command (cmd_table=0x0) at
>> /usr/home/adrian/work/freebsd/head/src/sys/ddb/db_command.c:440
>> #3 0x80365ee4 in db_command_loop () at
>> /usr/home/adrian/work/freebsd/head/src/sys/ddb/db_command.c:493
>> #4 0x8036897b in db_trap (type=, code=0)
>> at /usr/home/adrian/work/freebsd/head/src/sys/ddb/db_main.c:251
>> #5 0x8096c0f3 in kdb_trap (type=9, code=0, tf=> optimized out>) at
>> /usr/home/adrian/work/freebsd/head/src/sys/kern/subr_kdb.c:654
>> #6 0x80d34c81 in trap_fatal (frame=0xfe022815d7a0,
>> eva=) at
>> /usr/home/adrian/work/freebsd/head/src/sys/amd64/amd64/trap.c:829
>> #7 0x80d34951 in trap (frame=) at
>> /usr/home/adrian/work/freebsd/head/src/sys/amd64/amd64/trap.c:203
>> #8 0x80d149f7 in calltrap () at
>> /usr/home/adrian/work/freebsd/head/src/sys/amd64/amd64/exception.S:234
>> #9 0x8092c3fb in _rw_wlock_cookie (c=0xdeadc0dedeadc2de,
>> file=0x81211b1f
>> "/usr/home/adrian/work/freebsd/head/src/sys/netinet/if_ether.c",
>> line=205)
>> at /usr/home/adrian/work/freebsd/head/src/sys/kern/kern_rwlock.c:261
>> #10 0x80a2487f in arptimer (arg=0xf8005ecc4000) at
>> /usr/home/adrian/work/freebsd/head/src/sys/netinet/if_ether.c:205
>> #11 0x80944c24 in softclock_call_cc (c=0xf8005ecc40a8,
>> cc=0x81b2d480, direct=0) at
>> /usr/home/adrian/work/freebsd/head/src/sys/kern/kern_timeout.c:722
>> #12 0x80944f87 in softclock (arg=) at
>> /usr/home/adrian/work/freebsd/head/src/sys/kern/kern_timeout.c:851
>> #13 0x808f7eb6 in intr_event_execute_handlers (p=> optimized out>, ie=0xf800035a6600) at
>> /usr/home/adrian/work/freebsd/head/src/sys/kern/kern_intr.c:1262
>> #14 0x808f8546 in ithread_loop (arg=0xf800032c47c0) at
>> /usr/home/adrian/work/freebsd/head/src/sys/kern/kern_intr.c:1275
>> #15 0x808f57a4 in fork_exit (callout=0x808f84a0
>> , arg=0xf800032c47c0, frame=0xfe022815dac0) at
>> /usr/home/adrian/work/freebsd/head/src/sys/kern/kern_fork.c:1011
>> #16 0x80d14f2e in fork_trampoline () at
>> /usr/home/adrian/work/freebsd/head/src/sys/amd64/amd64/exception.S:609
>> #17 0x in ?? ()
>> Current language: auto; currently minimal
>>
>> (kgdb) print *(struct llentry *)c_arg
>> $2 = {lle_next = {le_next = 0x0, le_prev = 0xf8005e867dc8},
>> r_l3addr = {addr4 = {s_addr = 16782508}, addr6 = {__u6_addr =
>> {__u6_addr8 = 0xf8005ecc4010 "�\024", __u6_addr16 =
>> 0xf8005ecc4010,
>> __u6_addr32 = 0xf8005ecc4010}}}, ll_addr = {mac_aligned =
>> 110869256150596, mac16 = 0xf8005ecc4020, mac8 = 0xf8005ecc4020
>> "D\036���d"}, spare0 = 0, spare1 = 0, lle_tbl = 0xf8005e867e00,
>>   lle_head = 0xf8005e867dc8, lle_free = 0x80a2c5d0
>> , la_hold = 0x0, la_numheld = 0, la_expire =
>> 2110, la_flags = 1, la_asked = 0, la_preempt = 5, ln_state = 0,
>> ln_router = 0, ln_ntick = 0,
>>   lle_refcnt = 1, lle_chain = {le_next = 0x0, le_prev = 0x0},
>> lle_timer = {c_links = {le = {le_next = 0x0, le_prev =
>> 0x81b2d588}, sle = {sle_next = 0x0}, tqe = {tqe_next = 0x0,
>> tqe_prev = 0x81b2d588}},
>> c_time = 9066299815445, c_precision = 322122525000, c_arg =
>> 0xf8005ecc4000, c_func = 0x80a246e0 , c_lock =
>> 0x0, c_flags = 0, c_iflags = 144, c_cpu = 0}, lle_lock = {lock_object
>> = {
>>   lo_name = 0x8120fbce "lle", lo_flags = 90374144, lo_data
>> = 0, lo_witness = 0xfeb53c80}, rw_lock = 1}}
>>
>> ..
>>
>> (kgdb) print *((struct llentry *)c_arg)->lle_tbl
>> $4 

Re: panic in arptimer in r289937

2015-10-31 Thread Alexander V . Chernikov


31.10.2015, 16:46, "Adrian Chadd" :
> On 31 October 2015 at 09:34, Alexander V. Chernikov
>  wrote:
>>  31.10.2015, 05:32, "Adrian Chadd" :
>>>  Hiya,
>>>
>>>  Here's a panic from arptimer:
>>  Hi Adrian,
>>
>>  As far as I see, line 205 in if_ether.c is IF_AFDATA_LOCK(ifp) which 
>> happens after LLE_WUNLOCK().
>>  So, it looks like (pre-cached) ifp had been freed before locking ifdata.
>>  Do you have any more details on that? (e.g. was some interface detached at 
>> that moment, is it reproducible, etc..)
>>
>>  From a quick glance, potential use-after-free has been possible for quite a 
>> long time, but I wonder why it hasn't been observed before.
>>  Probably lltable_free() changes might have triggered that.
>>
>>  I'll take a deeper look on that and reply.
>
> Hiya!
>
> Thanks for your quick response.
>
> I mean, I use wifi, and ARPs can get lost / transmit can get delayed /
> etc. I'm also testing through a MIPS CPU based bridge, so I'm also not
I remember that :)
> bridging at line rate. (The above is from one of the x86 laptops doing
> the traffic test.) These are both reasons why I may be poking at a
> path that you don't normally see. :)
Yup. So, once again, could you provide a bit more details? :)
Was it related with any interface being destroyed?
What was the test scenario (just bridging between interfaces?)
Is this reproducible?
>
> I appreciate you taking a very quick look at this!
>
> Thanks,
>
> -adrian
>
>>>  (kgdb) bt
>>>  #0 doadump (textdump=0) at pcpu.h:221
>>>  #1 0x803666b6 in db_fncall (dummy1=,
>>>  dummy2=, dummy3=,
>>>  dummy4=) at
>>>  /usr/home/adrian/work/freebsd/head/src/sys/ddb/db_command.c:568
>>>  #2 0x8036614e in db_command (cmd_table=0x0) at
>>>  /usr/home/adrian/work/freebsd/head/src/sys/ddb/db_command.c:440
>>>  #3 0x80365ee4 in db_command_loop () at
>>>  /usr/home/adrian/work/freebsd/head/src/sys/ddb/db_command.c:493
>>>  #4 0x8036897b in db_trap (type=, code=0)
>>>  at /usr/home/adrian/work/freebsd/head/src/sys/ddb/db_main.c:251
>>>  #5 0x8096c0f3 in kdb_trap (type=9, code=0, tf=>>  optimized out>) at
>>>  /usr/home/adrian/work/freebsd/head/src/sys/kern/subr_kdb.c:654
>>>  #6 0x80d34c81 in trap_fatal (frame=0xfe022815d7a0,
>>>  eva=) at
>>>  /usr/home/adrian/work/freebsd/head/src/sys/amd64/amd64/trap.c:829
>>>  #7 0x80d34951 in trap (frame=) at
>>>  /usr/home/adrian/work/freebsd/head/src/sys/amd64/amd64/trap.c:203
>>>  #8 0x80d149f7 in calltrap () at
>>>  /usr/home/adrian/work/freebsd/head/src/sys/amd64/amd64/exception.S:234
>>>  #9 0x8092c3fb in _rw_wlock_cookie (c=0xdeadc0dedeadc2de,
>>>  file=0x81211b1f
>>>  "/usr/home/adrian/work/freebsd/head/src/sys/netinet/if_ether.c",
>>>  line=205)
>>>  at /usr/home/adrian/work/freebsd/head/src/sys/kern/kern_rwlock.c:261
>>>  #10 0x80a2487f in arptimer (arg=0xf8005ecc4000) at
>>>  /usr/home/adrian/work/freebsd/head/src/sys/netinet/if_ether.c:205
>>>  #11 0x80944c24 in softclock_call_cc (c=0xf8005ecc40a8,
>>>  cc=0x81b2d480, direct=0) at
>>>  /usr/home/adrian/work/freebsd/head/src/sys/kern/kern_timeout.c:722
>>>  #12 0x80944f87 in softclock (arg=) at
>>>  /usr/home/adrian/work/freebsd/head/src/sys/kern/kern_timeout.c:851
>>>  #13 0x808f7eb6 in intr_event_execute_handlers (p=>>  optimized out>, ie=0xf800035a6600) at
>>>  /usr/home/adrian/work/freebsd/head/src/sys/kern/kern_intr.c:1262
>>>  #14 0x808f8546 in ithread_loop (arg=0xf800032c47c0) at
>>>  /usr/home/adrian/work/freebsd/head/src/sys/kern/kern_intr.c:1275
>>>  #15 0x808f57a4 in fork_exit (callout=0x808f84a0
>>>  , arg=0xf800032c47c0, frame=0xfe022815dac0) at
>>>  /usr/home/adrian/work/freebsd/head/src/sys/kern/kern_fork.c:1011
>>>  #16 0x80d14f2e in fork_trampoline () at
>>>  /usr/home/adrian/work/freebsd/head/src/sys/amd64/amd64/exception.S:609
>>>  #17 0x in ?? ()
>>>  Current language: auto; currently minimal
>>>
>>>  (kgdb) print *(struct llentry *)c_arg
>>>  $2 = {lle_next = {le_next = 0x0, le_prev = 0xf8005e867dc8},
>>>  r_l3addr = {addr4 = {s_addr = 16782508}, addr6 = {__u6_addr =
>>>  {__u6_addr8 = 0xf8005ecc4010 "�\024", __u6_addr16 =
>>>  0xf8005ecc4010,
>>>  __u6_addr32 = 0xf8005ecc4010}}}, ll_addr = {mac_aligned =
>>>  110869256150596, mac16 = 0xf8005ecc4020, mac8 = 0xf8005ecc4020
>>>  "D\036���d"}, spare0 = 0, spare1 = 0, lle_tbl = 0xf8005e867e00,
>>>    lle_head = 0xf8005e867dc8, lle_free = 0x80a2c5d0
>>>  , la_hold = 0x0, la_numheld = 0, la_expire =
>>>  2110, la_flags = 1, la_asked = 0, la_preempt = 5, ln_state = 0,
>>>  ln_router = 0, ln_ntick = 0,
>>>    lle_refcnt = 1, lle_chain = {le_next = 0x0, le_prev = 0x0},
>>>  lle_timer = {c_links = {le = {le_next = 0x0, le_prev =
>>>  0x81b2d588}, sle = {sle_next = 0x0}, tqe = {tqe_next = 0x0,