Re: [Xenomai-core] [PULL] native: Fix msendq fastlock leakage
On 06/23/2011 11:37 AM, Jan Kiszka wrote: > On 2011-06-20 19:07, Jan Kiszka wrote: >> On 2011-06-19 15:00, Gilles Chanteperdrix wrote: >>> On 06/19/2011 01:17 PM, Gilles Chanteperdrix wrote: On 06/19/2011 12:14 PM, Gilles Chanteperdrix wrote: > I am working on this ppd cleanup issue again, I am asking for help to > find a fix in -head for all cases where the sys_ppd is needed during > some cleanup. > > The problem is that when the ppd cleanup is invoked: > - we have no guarantee that current is a thread from the Xenomai > application; > - if it is, current->mm is NULL. > > So, associating the sys_ppd to either current or current->mm does not > work. What we could do is pass the sys_ppd to all the other ppds cleanup > handlers, this would fix cases such as freeing mutexes fastlock, but > that does not help when the sys_ppd is needed during a thread deletion > hook. > > I would like to find a solution where simply calling xnsys_ppd_get() > will work, where we do not have an xnsys_ppd_get for each context, such > as for instance xnsys_ppd_get_by_mm/xnsys_ppd_get_by_task_struct, > because it would be too error-prone. > > Any idea anyone? The best I could come up with: use a ptd to store the mm currently being cleaned up, so that xnshadow_ppd_get continues to work, even in the middle of a cleanup. >>> >>> In order to also get xnshadow_ppd_get to work in task deletion hooks >>> (which is needed to avoid the issue at the origin of this thread), we >>> also need to set this ptd upon shadow mapping, so it is still there >>> when reaching the task deletion hook (where current->mm may be NULL). >>> Hence the patch: >>> >>> diff --git a/ksrc/nucleus/shadow.c b/ksrc/nucleus/shadow.c >>> index b243600..6bc4210 100644 >>> --- a/ksrc/nucleus/shadow.c >>> +++ b/ksrc/nucleus/shadow.c >>> @@ -65,6 +65,11 @@ int nkthrptd; >>> EXPORT_SYMBOL_GPL(nkthrptd); >>> int nkerrptd; >>> EXPORT_SYMBOL_GPL(nkerrptd); >>> +int nkmmptd; >>> +EXPORT_SYMBOL_GPL(nkmmptd); >>> + >>> +#define xnshadow_mmptd(t) ((t)->ptd[nkmmptd]) >>> +#define xnshadow_mm(t) ((struct mm_struct *)xnshadow_mmptd(t)) >> >> xnshadow_mm() can now return a no longer existing mm. So no user of >> xnshadow_mm should ever dereference that pointer. Thus we better change >> all that user to treat the return value as a void pointer e.g. >> >>> >>> struct xnskin_slot { >>> struct xnskin_props *props; >>> @@ -1304,6 +1309,8 @@ int xnshadow_map(xnthread_t *thread, xncompletion_t >>> __user *u_completion, >>> * friends. >>> */ >>> xnshadow_thrptd(current) = thread; >>> + xnshadow_mmptd(current) = current->mm; >>> + >>> rthal_enable_notifier(current); >>> >>> if (xnthread_base_priority(thread) == 0 && >>> @@ -2759,7 +2766,15 @@ static void detach_ppd(xnshadow_ppd_t * ppd) >>> >>> static inline void do_cleanup_event(struct mm_struct *mm) >>> { >>> + struct task_struct *p = current; >>> + struct mm_struct *old; >>> + >>> + old = xnshadow_mm(p); >>> + xnshadow_mmptd(p) = mm; >>> + >>> ppd_remove_mm(mm, &detach_ppd); >>> + >>> + xnshadow_mmptd(p) = old; >> >> I don't have the full picture yet, but that feels racy: If the context >> over which we clean up that foreign mm is also using xnshadow_mmptd, >> other threads in that process may dislike this temporary change. >> >>> } >>> >>> RTHAL_DECLARE_CLEANUP_EVENT(cleanup_event); >>> @@ -2925,7 +2940,7 @@ EXPORT_SYMBOL_GPL(xnshadow_unregister_interface); >>> xnshadow_ppd_t *xnshadow_ppd_get(unsigned muxid) >>> { >>> if (xnpod_userspace_p()) >>> - return ppd_lookup(muxid, current->mm); >>> + return ppd_lookup(muxid, xnshadow_mm(current) ?: current->mm); >>> >>> return NULL; >>> } >>> @@ -2960,8 +2975,9 @@ int xnshadow_mount(void) >>> sema_init(&completion_mutex, 1); >>> nkthrptd = rthal_alloc_ptdkey(); >>> nkerrptd = rthal_alloc_ptdkey(); >>> + nkmmptd = rthal_alloc_ptdkey(); >>> >>> - if (nkthrptd < 0 || nkerrptd < 0) { >>> + if (nkthrptd < 0 || nkerrptd < 0 || nkmmptd < 0) { >>> printk(KERN_ERR "Xenomai: cannot allocate PTD slots\n"); >>> return -ENOMEM; >>> } >>> diff --git a/ksrc/skins/posix/mutex.c b/ksrc/skins/posix/mutex.c >>> index 6ce75e5..cc86852 100644 >>> --- a/ksrc/skins/posix/mutex.c >>> +++ b/ksrc/skins/posix/mutex.c >>> @@ -219,10 +219,6 @@ void pse51_mutex_destroy_internal(pse51_mutex_t *mutex, >>> xnlock_put_irqrestore(&nklock, s); >>> >>> #ifdef CONFIG_XENO_FASTSYNCH >>> - /* We call xnheap_free even if the mutex is not pshared; when >>> - this function is called from pse51_mutexq_cleanup, the >>> - sem_heap is destroyed, or not the one to which the fastlock >>> - belongs, xnheap will simply return an error. */ >> >> I think this comment is not completely obsolete. It still applies /wrt >> shared/non-shared. >> >>> xnheap_free(&xnsys_ppd_get(
Re: [Xenomai-core] [PULL] native: Fix msendq fastlock leakage
On 06/23/2011 11:37 AM, Jan Kiszka wrote: > On 2011-06-20 19:07, Jan Kiszka wrote: >> On 2011-06-19 15:00, Gilles Chanteperdrix wrote: >>> On 06/19/2011 01:17 PM, Gilles Chanteperdrix wrote: On 06/19/2011 12:14 PM, Gilles Chanteperdrix wrote: > I am working on this ppd cleanup issue again, I am asking for help to > find a fix in -head for all cases where the sys_ppd is needed during > some cleanup. > > The problem is that when the ppd cleanup is invoked: > - we have no guarantee that current is a thread from the Xenomai > application; > - if it is, current->mm is NULL. > > So, associating the sys_ppd to either current or current->mm does not > work. What we could do is pass the sys_ppd to all the other ppds cleanup > handlers, this would fix cases such as freeing mutexes fastlock, but > that does not help when the sys_ppd is needed during a thread deletion > hook. > > I would like to find a solution where simply calling xnsys_ppd_get() > will work, where we do not have an xnsys_ppd_get for each context, such > as for instance xnsys_ppd_get_by_mm/xnsys_ppd_get_by_task_struct, > because it would be too error-prone. > > Any idea anyone? The best I could come up with: use a ptd to store the mm currently being cleaned up, so that xnshadow_ppd_get continues to work, even in the middle of a cleanup. >>> >>> In order to also get xnshadow_ppd_get to work in task deletion hooks >>> (which is needed to avoid the issue at the origin of this thread), we >>> also need to set this ptd upon shadow mapping, so it is still there >>> when reaching the task deletion hook (where current->mm may be NULL). >>> Hence the patch: >>> >>> diff --git a/ksrc/nucleus/shadow.c b/ksrc/nucleus/shadow.c >>> index b243600..6bc4210 100644 >>> --- a/ksrc/nucleus/shadow.c >>> +++ b/ksrc/nucleus/shadow.c >>> @@ -65,6 +65,11 @@ int nkthrptd; >>> EXPORT_SYMBOL_GPL(nkthrptd); >>> int nkerrptd; >>> EXPORT_SYMBOL_GPL(nkerrptd); >>> +int nkmmptd; >>> +EXPORT_SYMBOL_GPL(nkmmptd); >>> + >>> +#define xnshadow_mmptd(t) ((t)->ptd[nkmmptd]) >>> +#define xnshadow_mm(t) ((struct mm_struct *)xnshadow_mmptd(t)) >> >> xnshadow_mm() can now return a no longer existing mm. So no user of >> xnshadow_mm should ever dereference that pointer. Thus we better change >> all that user to treat the return value as a void pointer e.g. >> >>> >>> struct xnskin_slot { >>> struct xnskin_props *props; >>> @@ -1304,6 +1309,8 @@ int xnshadow_map(xnthread_t *thread, xncompletion_t >>> __user *u_completion, >>> * friends. >>> */ >>> xnshadow_thrptd(current) = thread; >>> + xnshadow_mmptd(current) = current->mm; >>> + >>> rthal_enable_notifier(current); >>> >>> if (xnthread_base_priority(thread) == 0 && >>> @@ -2759,7 +2766,15 @@ static void detach_ppd(xnshadow_ppd_t * ppd) >>> >>> static inline void do_cleanup_event(struct mm_struct *mm) >>> { >>> + struct task_struct *p = current; >>> + struct mm_struct *old; >>> + >>> + old = xnshadow_mm(p); >>> + xnshadow_mmptd(p) = mm; >>> + >>> ppd_remove_mm(mm, &detach_ppd); >>> + >>> + xnshadow_mmptd(p) = old; >> >> I don't have the full picture yet, but that feels racy: If the context >> over which we clean up that foreign mm is also using xnshadow_mmptd, >> other threads in that process may dislike this temporary change. >> >>> } >>> >>> RTHAL_DECLARE_CLEANUP_EVENT(cleanup_event); >>> @@ -2925,7 +2940,7 @@ EXPORT_SYMBOL_GPL(xnshadow_unregister_interface); >>> xnshadow_ppd_t *xnshadow_ppd_get(unsigned muxid) >>> { >>> if (xnpod_userspace_p()) >>> - return ppd_lookup(muxid, current->mm); >>> + return ppd_lookup(muxid, xnshadow_mm(current) ?: current->mm); >>> >>> return NULL; >>> } >>> @@ -2960,8 +2975,9 @@ int xnshadow_mount(void) >>> sema_init(&completion_mutex, 1); >>> nkthrptd = rthal_alloc_ptdkey(); >>> nkerrptd = rthal_alloc_ptdkey(); >>> + nkmmptd = rthal_alloc_ptdkey(); >>> >>> - if (nkthrptd < 0 || nkerrptd < 0) { >>> + if (nkthrptd < 0 || nkerrptd < 0 || nkmmptd < 0) { >>> printk(KERN_ERR "Xenomai: cannot allocate PTD slots\n"); >>> return -ENOMEM; >>> } >>> diff --git a/ksrc/skins/posix/mutex.c b/ksrc/skins/posix/mutex.c >>> index 6ce75e5..cc86852 100644 >>> --- a/ksrc/skins/posix/mutex.c >>> +++ b/ksrc/skins/posix/mutex.c >>> @@ -219,10 +219,6 @@ void pse51_mutex_destroy_internal(pse51_mutex_t *mutex, >>> xnlock_put_irqrestore(&nklock, s); >>> >>> #ifdef CONFIG_XENO_FASTSYNCH >>> - /* We call xnheap_free even if the mutex is not pshared; when >>> - this function is called from pse51_mutexq_cleanup, the >>> - sem_heap is destroyed, or not the one to which the fastlock >>> - belongs, xnheap will simply return an error. */ >> >> I think this comment is not completely obsolete. It still applies /wrt >> shared/non-shared. >> >>> xnheap_free(&xnsys_ppd_get(
Re: [Xenomai-core] [PULL] native: Fix msendq fastlock leakage
On 06/23/2011 08:24 PM, Philippe Gerum wrote: > On Thu, 2011-06-23 at 20:13 +0200, Philippe Gerum wrote: >> On Thu, 2011-06-23 at 19:32 +0200, Gilles Chanteperdrix wrote: >>> On 06/23/2011 01:15 PM, Jan Kiszka wrote: On 2011-06-23 13:11, Gilles Chanteperdrix wrote: > On 06/23/2011 11:37 AM, Jan Kiszka wrote: >> On 2011-06-20 19:07, Jan Kiszka wrote: >>> On 2011-06-19 15:00, Gilles Chanteperdrix wrote: On 06/19/2011 01:17 PM, Gilles Chanteperdrix wrote: > On 06/19/2011 12:14 PM, Gilles Chanteperdrix wrote: >> I am working on this ppd cleanup issue again, I am asking for help to >> find a fix in -head for all cases where the sys_ppd is needed during >> some cleanup. >> >> The problem is that when the ppd cleanup is invoked: >> - we have no guarantee that current is a thread from the Xenomai >> application; >> - if it is, current->mm is NULL. >> >> So, associating the sys_ppd to either current or current->mm does not >> work. What we could do is pass the sys_ppd to all the other ppds >> cleanup >> handlers, this would fix cases such as freeing mutexes fastlock, but >> that does not help when the sys_ppd is needed during a thread >> deletion hook. >> >> I would like to find a solution where simply calling xnsys_ppd_get() >> will work, where we do not have an xnsys_ppd_get for each context, >> such >> as for instance xnsys_ppd_get_by_mm/xnsys_ppd_get_by_task_struct, >> because it would be too error-prone. >> >> Any idea anyone? > > The best I could come up with: use a ptd to store the mm currently > being cleaned up, so that xnshadow_ppd_get continues to work, even > in the middle of a cleanup. In order to also get xnshadow_ppd_get to work in task deletion hooks (which is needed to avoid the issue at the origin of this thread), we also need to set this ptd upon shadow mapping, so it is still there when reaching the task deletion hook (where current->mm may be NULL). Hence the patch: diff --git a/ksrc/nucleus/shadow.c b/ksrc/nucleus/shadow.c index b243600..6bc4210 100644 --- a/ksrc/nucleus/shadow.c +++ b/ksrc/nucleus/shadow.c @@ -65,6 +65,11 @@ int nkthrptd; EXPORT_SYMBOL_GPL(nkthrptd); int nkerrptd; EXPORT_SYMBOL_GPL(nkerrptd); +int nkmmptd; +EXPORT_SYMBOL_GPL(nkmmptd); + +#define xnshadow_mmptd(t) ((t)->ptd[nkmmptd]) +#define xnshadow_mm(t) ((struct mm_struct *)xnshadow_mmptd(t)) >>> >>> xnshadow_mm() can now return a no longer existing mm. So no user of >>> xnshadow_mm should ever dereference that pointer. Thus we better change >>> all that user to treat the return value as a void pointer e.g. >>> struct xnskin_slot { struct xnskin_props *props; @@ -1304,6 +1309,8 @@ int xnshadow_map(xnthread_t *thread, xncompletion_t __user *u_completion, * friends. */ xnshadow_thrptd(current) = thread; + xnshadow_mmptd(current) = current->mm; + rthal_enable_notifier(current); if (xnthread_base_priority(thread) == 0 && @@ -2759,7 +2766,15 @@ static void detach_ppd(xnshadow_ppd_t * ppd) static inline void do_cleanup_event(struct mm_struct *mm) { + struct task_struct *p = current; + struct mm_struct *old; + + old = xnshadow_mm(p); + xnshadow_mmptd(p) = mm; + ppd_remove_mm(mm, &detach_ppd); + + xnshadow_mmptd(p) = old; >>> >>> I don't have the full picture yet, but that feels racy: If the context >>> over which we clean up that foreign mm is also using xnshadow_mmptd, >>> other threads in that process may dislike this temporary change. >>> } RTHAL_DECLARE_CLEANUP_EVENT(cleanup_event); @@ -2925,7 +2940,7 @@ EXPORT_SYMBOL_GPL(xnshadow_unregister_interface); xnshadow_ppd_t *xnshadow_ppd_get(unsigned muxid) { if (xnpod_userspace_p()) - return ppd_lookup(muxid, current->mm); + return ppd_lookup(muxid, xnshadow_mm(current) ?: current->mm); return NULL; } @@ -2960,8 +2975,9 @@ int xnshadow_mount(void) sema_init(&completion_mutex, 1); nkthrptd = rthal_alloc_ptdkey(); nkerrptd = rthal_alloc_ptdkey(); + nkmmptd = rthal_alloc_ptdkey(); - if (nkthrptd < 0 || nkerrptd < 0) { + i
Re: [Xenomai-core] [PULL] native: Fix msendq fastlock leakage
On 06/23/2011 01:15 PM, Jan Kiszka wrote: > On 2011-06-23 13:11, Gilles Chanteperdrix wrote: >> On 06/23/2011 11:37 AM, Jan Kiszka wrote: >>> On 2011-06-20 19:07, Jan Kiszka wrote: On 2011-06-19 15:00, Gilles Chanteperdrix wrote: > On 06/19/2011 01:17 PM, Gilles Chanteperdrix wrote: >> On 06/19/2011 12:14 PM, Gilles Chanteperdrix wrote: >>> I am working on this ppd cleanup issue again, I am asking for help to >>> find a fix in -head for all cases where the sys_ppd is needed during >>> some cleanup. >>> >>> The problem is that when the ppd cleanup is invoked: >>> - we have no guarantee that current is a thread from the Xenomai >>> application; >>> - if it is, current->mm is NULL. >>> >>> So, associating the sys_ppd to either current or current->mm does not >>> work. What we could do is pass the sys_ppd to all the other ppds cleanup >>> handlers, this would fix cases such as freeing mutexes fastlock, but >>> that does not help when the sys_ppd is needed during a thread deletion >>> hook. >>> >>> I would like to find a solution where simply calling xnsys_ppd_get() >>> will work, where we do not have an xnsys_ppd_get for each context, such >>> as for instance xnsys_ppd_get_by_mm/xnsys_ppd_get_by_task_struct, >>> because it would be too error-prone. >>> >>> Any idea anyone? >> >> The best I could come up with: use a ptd to store the mm currently >> being cleaned up, so that xnshadow_ppd_get continues to work, even >> in the middle of a cleanup. > > In order to also get xnshadow_ppd_get to work in task deletion hooks > (which is needed to avoid the issue at the origin of this thread), we > also need to set this ptd upon shadow mapping, so it is still there > when reaching the task deletion hook (where current->mm may be NULL). > Hence the patch: > > diff --git a/ksrc/nucleus/shadow.c b/ksrc/nucleus/shadow.c > index b243600..6bc4210 100644 > --- a/ksrc/nucleus/shadow.c > +++ b/ksrc/nucleus/shadow.c > @@ -65,6 +65,11 @@ int nkthrptd; > EXPORT_SYMBOL_GPL(nkthrptd); > int nkerrptd; > EXPORT_SYMBOL_GPL(nkerrptd); > +int nkmmptd; > +EXPORT_SYMBOL_GPL(nkmmptd); > + > +#define xnshadow_mmptd(t) ((t)->ptd[nkmmptd]) > +#define xnshadow_mm(t) ((struct mm_struct *)xnshadow_mmptd(t)) xnshadow_mm() can now return a no longer existing mm. So no user of xnshadow_mm should ever dereference that pointer. Thus we better change all that user to treat the return value as a void pointer e.g. > > struct xnskin_slot { > struct xnskin_props *props; > @@ -1304,6 +1309,8 @@ int xnshadow_map(xnthread_t *thread, xncompletion_t > __user *u_completion, >* friends. >*/ > xnshadow_thrptd(current) = thread; > + xnshadow_mmptd(current) = current->mm; > + > rthal_enable_notifier(current); > > if (xnthread_base_priority(thread) == 0 && > @@ -2759,7 +2766,15 @@ static void detach_ppd(xnshadow_ppd_t * ppd) > > static inline void do_cleanup_event(struct mm_struct *mm) > { > + struct task_struct *p = current; > + struct mm_struct *old; > + > + old = xnshadow_mm(p); > + xnshadow_mmptd(p) = mm; > + > ppd_remove_mm(mm, &detach_ppd); > + > + xnshadow_mmptd(p) = old; I don't have the full picture yet, but that feels racy: If the context over which we clean up that foreign mm is also using xnshadow_mmptd, other threads in that process may dislike this temporary change. > } > > RTHAL_DECLARE_CLEANUP_EVENT(cleanup_event); > @@ -2925,7 +2940,7 @@ EXPORT_SYMBOL_GPL(xnshadow_unregister_interface); > xnshadow_ppd_t *xnshadow_ppd_get(unsigned muxid) > { > if (xnpod_userspace_p()) > - return ppd_lookup(muxid, current->mm); > + return ppd_lookup(muxid, xnshadow_mm(current) ?: current->mm); > > return NULL; > } > @@ -2960,8 +2975,9 @@ int xnshadow_mount(void) > sema_init(&completion_mutex, 1); > nkthrptd = rthal_alloc_ptdkey(); > nkerrptd = rthal_alloc_ptdkey(); > + nkmmptd = rthal_alloc_ptdkey(); > > - if (nkthrptd < 0 || nkerrptd < 0) { > + if (nkthrptd < 0 || nkerrptd < 0 || nkmmptd < 0) { > printk(KERN_ERR "Xenomai: cannot allocate PTD slots\n"); > return -ENOMEM; > } > diff --git a/ksrc/skins/posix/mutex.c b/ksrc/skins/posix/mutex.c > index 6ce75e5..cc86852 100644 > --- a/ksrc/skins/posix/mutex.c > +++ b/ksrc/skins/posix/mutex.c > @@ -219,10 +219,6 @@ void pse51_mutex_destroy_internal(pse51_mutex_t > *mutex, > xnlock_put_irqrestore(&nklock, s); > > #ifdef CONFIG_XENO_FASTSYNCH > - /* We call xnheap_free even if the mutex is not pshared; when > -this function is called from pse51_mutexq
Re: [Xenomai-core] [PULL] native: Fix msendq fastlock leakage
On Thu, 2011-06-23 at 19:32 +0200, Gilles Chanteperdrix wrote: > On 06/23/2011 01:15 PM, Jan Kiszka wrote: > > On 2011-06-23 13:11, Gilles Chanteperdrix wrote: > >> On 06/23/2011 11:37 AM, Jan Kiszka wrote: > >>> On 2011-06-20 19:07, Jan Kiszka wrote: > On 2011-06-19 15:00, Gilles Chanteperdrix wrote: > > On 06/19/2011 01:17 PM, Gilles Chanteperdrix wrote: > >> On 06/19/2011 12:14 PM, Gilles Chanteperdrix wrote: > >>> I am working on this ppd cleanup issue again, I am asking for help to > >>> find a fix in -head for all cases where the sys_ppd is needed during > >>> some cleanup. > >>> > >>> The problem is that when the ppd cleanup is invoked: > >>> - we have no guarantee that current is a thread from the Xenomai > >>> application; > >>> - if it is, current->mm is NULL. > >>> > >>> So, associating the sys_ppd to either current or current->mm does not > >>> work. What we could do is pass the sys_ppd to all the other ppds > >>> cleanup > >>> handlers, this would fix cases such as freeing mutexes fastlock, but > >>> that does not help when the sys_ppd is needed during a thread > >>> deletion hook. > >>> > >>> I would like to find a solution where simply calling xnsys_ppd_get() > >>> will work, where we do not have an xnsys_ppd_get for each context, > >>> such > >>> as for instance xnsys_ppd_get_by_mm/xnsys_ppd_get_by_task_struct, > >>> because it would be too error-prone. > >>> > >>> Any idea anyone? > >> > >> The best I could come up with: use a ptd to store the mm currently > >> being cleaned up, so that xnshadow_ppd_get continues to work, even > >> in the middle of a cleanup. > > > > In order to also get xnshadow_ppd_get to work in task deletion hooks > > (which is needed to avoid the issue at the origin of this thread), we > > also need to set this ptd upon shadow mapping, so it is still there > > when reaching the task deletion hook (where current->mm may be NULL). > > Hence the patch: > > > > diff --git a/ksrc/nucleus/shadow.c b/ksrc/nucleus/shadow.c > > index b243600..6bc4210 100644 > > --- a/ksrc/nucleus/shadow.c > > +++ b/ksrc/nucleus/shadow.c > > @@ -65,6 +65,11 @@ int nkthrptd; > > EXPORT_SYMBOL_GPL(nkthrptd); > > int nkerrptd; > > EXPORT_SYMBOL_GPL(nkerrptd); > > +int nkmmptd; > > +EXPORT_SYMBOL_GPL(nkmmptd); > > + > > +#define xnshadow_mmptd(t) ((t)->ptd[nkmmptd]) > > +#define xnshadow_mm(t) ((struct mm_struct *)xnshadow_mmptd(t)) > > xnshadow_mm() can now return a no longer existing mm. So no user of > xnshadow_mm should ever dereference that pointer. Thus we better change > all that user to treat the return value as a void pointer e.g. > > > > > struct xnskin_slot { > > struct xnskin_props *props; > > @@ -1304,6 +1309,8 @@ int xnshadow_map(xnthread_t *thread, > > xncompletion_t __user *u_completion, > > * friends. > > */ > > xnshadow_thrptd(current) = thread; > > + xnshadow_mmptd(current) = current->mm; > > + > > rthal_enable_notifier(current); > > > > if (xnthread_base_priority(thread) == 0 && > > @@ -2759,7 +2766,15 @@ static void detach_ppd(xnshadow_ppd_t * ppd) > > > > static inline void do_cleanup_event(struct mm_struct *mm) > > { > > + struct task_struct *p = current; > > + struct mm_struct *old; > > + > > + old = xnshadow_mm(p); > > + xnshadow_mmptd(p) = mm; > > + > > ppd_remove_mm(mm, &detach_ppd); > > + > > + xnshadow_mmptd(p) = old; > > I don't have the full picture yet, but that feels racy: If the context > over which we clean up that foreign mm is also using xnshadow_mmptd, > other threads in that process may dislike this temporary change. > > > } > > > > RTHAL_DECLARE_CLEANUP_EVENT(cleanup_event); > > @@ -2925,7 +2940,7 @@ EXPORT_SYMBOL_GPL(xnshadow_unregister_interface); > > xnshadow_ppd_t *xnshadow_ppd_get(unsigned muxid) > > { > > if (xnpod_userspace_p()) > > - return ppd_lookup(muxid, current->mm); > > + return ppd_lookup(muxid, xnshadow_mm(current) ?: > > current->mm); > > > > return NULL; > > } > > @@ -2960,8 +2975,9 @@ int xnshadow_mount(void) > > sema_init(&completion_mutex, 1); > > nkthrptd = rthal_alloc_ptdkey(); > > nkerrptd = rthal_alloc_ptdkey(); > > + nkmmptd = rthal_alloc_ptdkey(); > > > > - if (nkthrptd < 0 || nkerrptd < 0) { > > + if (nkthrptd < 0 || nkerrptd < 0 || nkmmptd < 0) { > > printk(KERN_ERR "Xenomai: cannot allocate PTD slots\n"); > > return -ENOMEM; > > } > >
Re: [Xenomai-core] [PULL] native: Fix msendq fastlock leakage
On Thu, 2011-06-23 at 20:13 +0200, Philippe Gerum wrote: > On Thu, 2011-06-23 at 19:32 +0200, Gilles Chanteperdrix wrote: > > On 06/23/2011 01:15 PM, Jan Kiszka wrote: > > > On 2011-06-23 13:11, Gilles Chanteperdrix wrote: > > >> On 06/23/2011 11:37 AM, Jan Kiszka wrote: > > >>> On 2011-06-20 19:07, Jan Kiszka wrote: > > On 2011-06-19 15:00, Gilles Chanteperdrix wrote: > > > On 06/19/2011 01:17 PM, Gilles Chanteperdrix wrote: > > >> On 06/19/2011 12:14 PM, Gilles Chanteperdrix wrote: > > >>> I am working on this ppd cleanup issue again, I am asking for help > > >>> to > > >>> find a fix in -head for all cases where the sys_ppd is needed during > > >>> some cleanup. > > >>> > > >>> The problem is that when the ppd cleanup is invoked: > > >>> - we have no guarantee that current is a thread from the Xenomai > > >>> application; > > >>> - if it is, current->mm is NULL. > > >>> > > >>> So, associating the sys_ppd to either current or current->mm does > > >>> not > > >>> work. What we could do is pass the sys_ppd to all the other ppds > > >>> cleanup > > >>> handlers, this would fix cases such as freeing mutexes fastlock, but > > >>> that does not help when the sys_ppd is needed during a thread > > >>> deletion hook. > > >>> > > >>> I would like to find a solution where simply calling xnsys_ppd_get() > > >>> will work, where we do not have an xnsys_ppd_get for each context, > > >>> such > > >>> as for instance xnsys_ppd_get_by_mm/xnsys_ppd_get_by_task_struct, > > >>> because it would be too error-prone. > > >>> > > >>> Any idea anyone? > > >> > > >> The best I could come up with: use a ptd to store the mm currently > > >> being cleaned up, so that xnshadow_ppd_get continues to work, even > > >> in the middle of a cleanup. > > > > > > In order to also get xnshadow_ppd_get to work in task deletion hooks > > > (which is needed to avoid the issue at the origin of this thread), we > > > also need to set this ptd upon shadow mapping, so it is still there > > > when reaching the task deletion hook (where current->mm may be NULL). > > > Hence the patch: > > > > > > diff --git a/ksrc/nucleus/shadow.c b/ksrc/nucleus/shadow.c > > > index b243600..6bc4210 100644 > > > --- a/ksrc/nucleus/shadow.c > > > +++ b/ksrc/nucleus/shadow.c > > > @@ -65,6 +65,11 @@ int nkthrptd; > > > EXPORT_SYMBOL_GPL(nkthrptd); > > > int nkerrptd; > > > EXPORT_SYMBOL_GPL(nkerrptd); > > > +int nkmmptd; > > > +EXPORT_SYMBOL_GPL(nkmmptd); > > > + > > > +#define xnshadow_mmptd(t) ((t)->ptd[nkmmptd]) > > > +#define xnshadow_mm(t) ((struct mm_struct *)xnshadow_mmptd(t)) > > > > xnshadow_mm() can now return a no longer existing mm. So no user of > > xnshadow_mm should ever dereference that pointer. Thus we better change > > all that user to treat the return value as a void pointer e.g. > > > > > > > > struct xnskin_slot { > > > struct xnskin_props *props; > > > @@ -1304,6 +1309,8 @@ int xnshadow_map(xnthread_t *thread, > > > xncompletion_t __user *u_completion, > > >* friends. > > >*/ > > > xnshadow_thrptd(current) = thread; > > > + xnshadow_mmptd(current) = current->mm; > > > + > > > rthal_enable_notifier(current); > > > > > > if (xnthread_base_priority(thread) == 0 && > > > @@ -2759,7 +2766,15 @@ static void detach_ppd(xnshadow_ppd_t * ppd) > > > > > > static inline void do_cleanup_event(struct mm_struct *mm) > > > { > > > + struct task_struct *p = current; > > > + struct mm_struct *old; > > > + > > > + old = xnshadow_mm(p); > > > + xnshadow_mmptd(p) = mm; > > > + > > > ppd_remove_mm(mm, &detach_ppd); > > > + > > > + xnshadow_mmptd(p) = old; > > > > I don't have the full picture yet, but that feels racy: If the context > > over which we clean up that foreign mm is also using xnshadow_mmptd, > > other threads in that process may dislike this temporary change. > > > > > } > > > > > > RTHAL_DECLARE_CLEANUP_EVENT(cleanup_event); > > > @@ -2925,7 +2940,7 @@ > > > EXPORT_SYMBOL_GPL(xnshadow_unregister_interface); > > > xnshadow_ppd_t *xnshadow_ppd_get(unsigned muxid) > > > { > > > if (xnpod_userspace_p()) > > > - return ppd_lookup(muxid, current->mm); > > > + return ppd_lookup(muxid, xnshadow_mm(current) ?: > > > current->mm); > > > > > > return NULL; > > > } > > > @@ -2960,8 +2975,9 @@ int xnshadow_mount(void) > > > sema_init(&completion_mutex, 1); > > > nkthrptd = rthal_alloc_ptdkey(); > > > nkerrptd = rthal_alloc_ptdkey(); > > > + nkmmptd = rthal_alloc_ptdkey(); > > >>
Re: [Xenomai-core] [PULL] native: Fix msendq fastlock leakage
On 2011-06-23 13:11, Gilles Chanteperdrix wrote: > On 06/23/2011 11:37 AM, Jan Kiszka wrote: >> On 2011-06-20 19:07, Jan Kiszka wrote: >>> On 2011-06-19 15:00, Gilles Chanteperdrix wrote: On 06/19/2011 01:17 PM, Gilles Chanteperdrix wrote: > On 06/19/2011 12:14 PM, Gilles Chanteperdrix wrote: >> I am working on this ppd cleanup issue again, I am asking for help to >> find a fix in -head for all cases where the sys_ppd is needed during >> some cleanup. >> >> The problem is that when the ppd cleanup is invoked: >> - we have no guarantee that current is a thread from the Xenomai >> application; >> - if it is, current->mm is NULL. >> >> So, associating the sys_ppd to either current or current->mm does not >> work. What we could do is pass the sys_ppd to all the other ppds cleanup >> handlers, this would fix cases such as freeing mutexes fastlock, but >> that does not help when the sys_ppd is needed during a thread deletion >> hook. >> >> I would like to find a solution where simply calling xnsys_ppd_get() >> will work, where we do not have an xnsys_ppd_get for each context, such >> as for instance xnsys_ppd_get_by_mm/xnsys_ppd_get_by_task_struct, >> because it would be too error-prone. >> >> Any idea anyone? > > The best I could come up with: use a ptd to store the mm currently > being cleaned up, so that xnshadow_ppd_get continues to work, even > in the middle of a cleanup. In order to also get xnshadow_ppd_get to work in task deletion hooks (which is needed to avoid the issue at the origin of this thread), we also need to set this ptd upon shadow mapping, so it is still there when reaching the task deletion hook (where current->mm may be NULL). Hence the patch: diff --git a/ksrc/nucleus/shadow.c b/ksrc/nucleus/shadow.c index b243600..6bc4210 100644 --- a/ksrc/nucleus/shadow.c +++ b/ksrc/nucleus/shadow.c @@ -65,6 +65,11 @@ int nkthrptd; EXPORT_SYMBOL_GPL(nkthrptd); int nkerrptd; EXPORT_SYMBOL_GPL(nkerrptd); +int nkmmptd; +EXPORT_SYMBOL_GPL(nkmmptd); + +#define xnshadow_mmptd(t) ((t)->ptd[nkmmptd]) +#define xnshadow_mm(t) ((struct mm_struct *)xnshadow_mmptd(t)) >>> >>> xnshadow_mm() can now return a no longer existing mm. So no user of >>> xnshadow_mm should ever dereference that pointer. Thus we better change >>> all that user to treat the return value as a void pointer e.g. >>> struct xnskin_slot { struct xnskin_props *props; @@ -1304,6 +1309,8 @@ int xnshadow_map(xnthread_t *thread, xncompletion_t __user *u_completion, * friends. */ xnshadow_thrptd(current) = thread; + xnshadow_mmptd(current) = current->mm; + rthal_enable_notifier(current); if (xnthread_base_priority(thread) == 0 && @@ -2759,7 +2766,15 @@ static void detach_ppd(xnshadow_ppd_t * ppd) static inline void do_cleanup_event(struct mm_struct *mm) { + struct task_struct *p = current; + struct mm_struct *old; + + old = xnshadow_mm(p); + xnshadow_mmptd(p) = mm; + ppd_remove_mm(mm, &detach_ppd); + + xnshadow_mmptd(p) = old; >>> >>> I don't have the full picture yet, but that feels racy: If the context >>> over which we clean up that foreign mm is also using xnshadow_mmptd, >>> other threads in that process may dislike this temporary change. >>> } RTHAL_DECLARE_CLEANUP_EVENT(cleanup_event); @@ -2925,7 +2940,7 @@ EXPORT_SYMBOL_GPL(xnshadow_unregister_interface); xnshadow_ppd_t *xnshadow_ppd_get(unsigned muxid) { if (xnpod_userspace_p()) - return ppd_lookup(muxid, current->mm); + return ppd_lookup(muxid, xnshadow_mm(current) ?: current->mm); return NULL; } @@ -2960,8 +2975,9 @@ int xnshadow_mount(void) sema_init(&completion_mutex, 1); nkthrptd = rthal_alloc_ptdkey(); nkerrptd = rthal_alloc_ptdkey(); + nkmmptd = rthal_alloc_ptdkey(); - if (nkthrptd < 0 || nkerrptd < 0) { + if (nkthrptd < 0 || nkerrptd < 0 || nkmmptd < 0) { printk(KERN_ERR "Xenomai: cannot allocate PTD slots\n"); return -ENOMEM; } diff --git a/ksrc/skins/posix/mutex.c b/ksrc/skins/posix/mutex.c index 6ce75e5..cc86852 100644 --- a/ksrc/skins/posix/mutex.c +++ b/ksrc/skins/posix/mutex.c @@ -219,10 +219,6 @@ void pse51_mutex_destroy_internal(pse51_mutex_t *mutex, xnlock_put_irqrestore(&nklock, s); #ifdef CONFIG_XENO_FASTSYNCH - /* We call xnheap_free even if the mutex is not pshared; when - this function is called from pse51_mutexq_cleanup, the - sem_heap is destroyed, or not the one to which the fastlock - belongs, xnheap will simply return an e
Re: [Xenomai-core] [PULL] native: Fix msendq fastlock leakage
On 06/23/2011 11:37 AM, Jan Kiszka wrote: > On 2011-06-20 19:07, Jan Kiszka wrote: >> On 2011-06-19 15:00, Gilles Chanteperdrix wrote: >>> On 06/19/2011 01:17 PM, Gilles Chanteperdrix wrote: On 06/19/2011 12:14 PM, Gilles Chanteperdrix wrote: > I am working on this ppd cleanup issue again, I am asking for help to > find a fix in -head for all cases where the sys_ppd is needed during > some cleanup. > > The problem is that when the ppd cleanup is invoked: > - we have no guarantee that current is a thread from the Xenomai > application; > - if it is, current->mm is NULL. > > So, associating the sys_ppd to either current or current->mm does not > work. What we could do is pass the sys_ppd to all the other ppds cleanup > handlers, this would fix cases such as freeing mutexes fastlock, but > that does not help when the sys_ppd is needed during a thread deletion > hook. > > I would like to find a solution where simply calling xnsys_ppd_get() > will work, where we do not have an xnsys_ppd_get for each context, such > as for instance xnsys_ppd_get_by_mm/xnsys_ppd_get_by_task_struct, > because it would be too error-prone. > > Any idea anyone? The best I could come up with: use a ptd to store the mm currently being cleaned up, so that xnshadow_ppd_get continues to work, even in the middle of a cleanup. >>> >>> In order to also get xnshadow_ppd_get to work in task deletion hooks >>> (which is needed to avoid the issue at the origin of this thread), we >>> also need to set this ptd upon shadow mapping, so it is still there >>> when reaching the task deletion hook (where current->mm may be NULL). >>> Hence the patch: >>> >>> diff --git a/ksrc/nucleus/shadow.c b/ksrc/nucleus/shadow.c >>> index b243600..6bc4210 100644 >>> --- a/ksrc/nucleus/shadow.c >>> +++ b/ksrc/nucleus/shadow.c >>> @@ -65,6 +65,11 @@ int nkthrptd; >>> EXPORT_SYMBOL_GPL(nkthrptd); >>> int nkerrptd; >>> EXPORT_SYMBOL_GPL(nkerrptd); >>> +int nkmmptd; >>> +EXPORT_SYMBOL_GPL(nkmmptd); >>> + >>> +#define xnshadow_mmptd(t) ((t)->ptd[nkmmptd]) >>> +#define xnshadow_mm(t) ((struct mm_struct *)xnshadow_mmptd(t)) >> >> xnshadow_mm() can now return a no longer existing mm. So no user of >> xnshadow_mm should ever dereference that pointer. Thus we better change >> all that user to treat the return value as a void pointer e.g. >> >>> >>> struct xnskin_slot { >>> struct xnskin_props *props; >>> @@ -1304,6 +1309,8 @@ int xnshadow_map(xnthread_t *thread, xncompletion_t >>> __user *u_completion, >>> * friends. >>> */ >>> xnshadow_thrptd(current) = thread; >>> + xnshadow_mmptd(current) = current->mm; >>> + >>> rthal_enable_notifier(current); >>> >>> if (xnthread_base_priority(thread) == 0 && >>> @@ -2759,7 +2766,15 @@ static void detach_ppd(xnshadow_ppd_t * ppd) >>> >>> static inline void do_cleanup_event(struct mm_struct *mm) >>> { >>> + struct task_struct *p = current; >>> + struct mm_struct *old; >>> + >>> + old = xnshadow_mm(p); >>> + xnshadow_mmptd(p) = mm; >>> + >>> ppd_remove_mm(mm, &detach_ppd); >>> + >>> + xnshadow_mmptd(p) = old; >> >> I don't have the full picture yet, but that feels racy: If the context >> over which we clean up that foreign mm is also using xnshadow_mmptd, >> other threads in that process may dislike this temporary change. >> >>> } >>> >>> RTHAL_DECLARE_CLEANUP_EVENT(cleanup_event); >>> @@ -2925,7 +2940,7 @@ EXPORT_SYMBOL_GPL(xnshadow_unregister_interface); >>> xnshadow_ppd_t *xnshadow_ppd_get(unsigned muxid) >>> { >>> if (xnpod_userspace_p()) >>> - return ppd_lookup(muxid, current->mm); >>> + return ppd_lookup(muxid, xnshadow_mm(current) ?: current->mm); >>> >>> return NULL; >>> } >>> @@ -2960,8 +2975,9 @@ int xnshadow_mount(void) >>> sema_init(&completion_mutex, 1); >>> nkthrptd = rthal_alloc_ptdkey(); >>> nkerrptd = rthal_alloc_ptdkey(); >>> + nkmmptd = rthal_alloc_ptdkey(); >>> >>> - if (nkthrptd < 0 || nkerrptd < 0) { >>> + if (nkthrptd < 0 || nkerrptd < 0 || nkmmptd < 0) { >>> printk(KERN_ERR "Xenomai: cannot allocate PTD slots\n"); >>> return -ENOMEM; >>> } >>> diff --git a/ksrc/skins/posix/mutex.c b/ksrc/skins/posix/mutex.c >>> index 6ce75e5..cc86852 100644 >>> --- a/ksrc/skins/posix/mutex.c >>> +++ b/ksrc/skins/posix/mutex.c >>> @@ -219,10 +219,6 @@ void pse51_mutex_destroy_internal(pse51_mutex_t *mutex, >>> xnlock_put_irqrestore(&nklock, s); >>> >>> #ifdef CONFIG_XENO_FASTSYNCH >>> - /* We call xnheap_free even if the mutex is not pshared; when >>> - this function is called from pse51_mutexq_cleanup, the >>> - sem_heap is destroyed, or not the one to which the fastlock >>> - belongs, xnheap will simply return an error. */ >> >> I think this comment is not completely obsolete. It still applies /wrt >> shared/non-shared. >> >>> xnheap_free(&xnsys_ppd_get(
Re: [Xenomai-core] [PULL] native: Fix msendq fastlock leakage
On 2011-06-20 19:07, Jan Kiszka wrote: > On 2011-06-19 15:00, Gilles Chanteperdrix wrote: >> On 06/19/2011 01:17 PM, Gilles Chanteperdrix wrote: >>> On 06/19/2011 12:14 PM, Gilles Chanteperdrix wrote: I am working on this ppd cleanup issue again, I am asking for help to find a fix in -head for all cases where the sys_ppd is needed during some cleanup. The problem is that when the ppd cleanup is invoked: - we have no guarantee that current is a thread from the Xenomai application; - if it is, current->mm is NULL. So, associating the sys_ppd to either current or current->mm does not work. What we could do is pass the sys_ppd to all the other ppds cleanup handlers, this would fix cases such as freeing mutexes fastlock, but that does not help when the sys_ppd is needed during a thread deletion hook. I would like to find a solution where simply calling xnsys_ppd_get() will work, where we do not have an xnsys_ppd_get for each context, such as for instance xnsys_ppd_get_by_mm/xnsys_ppd_get_by_task_struct, because it would be too error-prone. Any idea anyone? >>> >>> The best I could come up with: use a ptd to store the mm currently >>> being cleaned up, so that xnshadow_ppd_get continues to work, even >>> in the middle of a cleanup. >> >> In order to also get xnshadow_ppd_get to work in task deletion hooks >> (which is needed to avoid the issue at the origin of this thread), we >> also need to set this ptd upon shadow mapping, so it is still there >> when reaching the task deletion hook (where current->mm may be NULL). >> Hence the patch: >> >> diff --git a/ksrc/nucleus/shadow.c b/ksrc/nucleus/shadow.c >> index b243600..6bc4210 100644 >> --- a/ksrc/nucleus/shadow.c >> +++ b/ksrc/nucleus/shadow.c >> @@ -65,6 +65,11 @@ int nkthrptd; >> EXPORT_SYMBOL_GPL(nkthrptd); >> int nkerrptd; >> EXPORT_SYMBOL_GPL(nkerrptd); >> +int nkmmptd; >> +EXPORT_SYMBOL_GPL(nkmmptd); >> + >> +#define xnshadow_mmptd(t) ((t)->ptd[nkmmptd]) >> +#define xnshadow_mm(t) ((struct mm_struct *)xnshadow_mmptd(t)) > > xnshadow_mm() can now return a no longer existing mm. So no user of > xnshadow_mm should ever dereference that pointer. Thus we better change > all that user to treat the return value as a void pointer e.g. > >> >> struct xnskin_slot { >> struct xnskin_props *props; >> @@ -1304,6 +1309,8 @@ int xnshadow_map(xnthread_t *thread, xncompletion_t >> __user *u_completion, >> * friends. >> */ >> xnshadow_thrptd(current) = thread; >> +xnshadow_mmptd(current) = current->mm; >> + >> rthal_enable_notifier(current); >> >> if (xnthread_base_priority(thread) == 0 && >> @@ -2759,7 +2766,15 @@ static void detach_ppd(xnshadow_ppd_t * ppd) >> >> static inline void do_cleanup_event(struct mm_struct *mm) >> { >> +struct task_struct *p = current; >> +struct mm_struct *old; >> + >> +old = xnshadow_mm(p); >> +xnshadow_mmptd(p) = mm; >> + >> ppd_remove_mm(mm, &detach_ppd); >> + >> +xnshadow_mmptd(p) = old; > > I don't have the full picture yet, but that feels racy: If the context > over which we clean up that foreign mm is also using xnshadow_mmptd, > other threads in that process may dislike this temporary change. > >> } >> >> RTHAL_DECLARE_CLEANUP_EVENT(cleanup_event); >> @@ -2925,7 +2940,7 @@ EXPORT_SYMBOL_GPL(xnshadow_unregister_interface); >> xnshadow_ppd_t *xnshadow_ppd_get(unsigned muxid) >> { >> if (xnpod_userspace_p()) >> -return ppd_lookup(muxid, current->mm); >> +return ppd_lookup(muxid, xnshadow_mm(current) ?: current->mm); >> >> return NULL; >> } >> @@ -2960,8 +2975,9 @@ int xnshadow_mount(void) >> sema_init(&completion_mutex, 1); >> nkthrptd = rthal_alloc_ptdkey(); >> nkerrptd = rthal_alloc_ptdkey(); >> +nkmmptd = rthal_alloc_ptdkey(); >> >> -if (nkthrptd < 0 || nkerrptd < 0) { >> +if (nkthrptd < 0 || nkerrptd < 0 || nkmmptd < 0) { >> printk(KERN_ERR "Xenomai: cannot allocate PTD slots\n"); >> return -ENOMEM; >> } >> diff --git a/ksrc/skins/posix/mutex.c b/ksrc/skins/posix/mutex.c >> index 6ce75e5..cc86852 100644 >> --- a/ksrc/skins/posix/mutex.c >> +++ b/ksrc/skins/posix/mutex.c >> @@ -219,10 +219,6 @@ void pse51_mutex_destroy_internal(pse51_mutex_t *mutex, >> xnlock_put_irqrestore(&nklock, s); >> >> #ifdef CONFIG_XENO_FASTSYNCH >> -/* We call xnheap_free even if the mutex is not pshared; when >> - this function is called from pse51_mutexq_cleanup, the >> - sem_heap is destroyed, or not the one to which the fastlock >> - belongs, xnheap will simply return an error. */ > > I think this comment is not completely obsolete. It still applies /wrt > shared/non-shared. > >> xnheap_free(&xnsys_ppd_get(mutex->attr.pshared)->sem_heap, >> mutex->synchbase.fastlock); >> #endif /* CONFIG_XENO_FASTSYNCH */ >> >> > >
Re: [Xenomai-core] [PULL] native: Fix msendq fastlock leakage
On 2011-06-20 19:46, Gilles Chanteperdrix wrote: > On 06/20/2011 07:07 PM, Jan Kiszka wrote: >>> static inline void do_cleanup_event(struct mm_struct *mm) >>> { >>> + struct task_struct *p = current; >>> + struct mm_struct *old; >>> + >>> + old = xnshadow_mm(p); >>> + xnshadow_mmptd(p) = mm; >>> + >>> ppd_remove_mm(mm, &detach_ppd); >>> + >>> + xnshadow_mmptd(p) = old; >> >> I don't have the full picture yet, but that feels racy: If the context >> over which we clean up that foreign mm is also using xnshadow_mmptd, >> other threads in that process may dislike this temporary change. > > This mmptd is only used by xnshadow_ppd_get (which never dereferences it > by the way I know that the current code is safe. Avoiding mm_struct types is about keeping it safe in the future. Why track the type if its just an opaque key and should be handled as such? ). And if the current thread is running the cleanup event, it > is not running any other service at the same time. So, this is safe. Ah, xnshadow_mmptd is per-thread, not per-process as I incorrectly assumed. Then it's safe. > > An alternative implementation would be to use some global per-cpu > variable and disable the preemption during cleanup. But that would not > fix the case of the shadow cleanups where current->mm is already null. > > The remaining problem is an irq interrupting the cleanup, and using > xnshadow_ppd_get. But I would not expect that to happen. I mean, > xnshadow_ppd_get is more a service to be used for implementation of > system calls. Yes, that kind of usage would be a bug in the first place. > >>> diff --git a/ksrc/skins/posix/mutex.c b/ksrc/skins/posix/mutex.c >>> index 6ce75e5..cc86852 100644 >>> --- a/ksrc/skins/posix/mutex.c >>> +++ b/ksrc/skins/posix/mutex.c >>> @@ -219,10 +219,6 @@ void pse51_mutex_destroy_internal(pse51_mutex_t *mutex, >>> xnlock_put_irqrestore(&nklock, s); >>> >>> #ifdef CONFIG_XENO_FASTSYNCH >>> - /* We call xnheap_free even if the mutex is not pshared; when >>> - this function is called from pse51_mutexq_cleanup, the >>> - sem_heap is destroyed, or not the one to which the fastlock >>> - belongs, xnheap will simply return an error. */ >> >> I think this comment is not completely obsolete. It still applies /wrt >> shared/non-shared. > > If we apply this patch after the one changing the ppd cleanup order, > everything falls in place (I can even put a warning in xnheap_destroy if > the heap has some bytes still in use when destroyed). "We call xnheap_free even if the mutex is not pshared." This still applies and is unrelated to the ppd changes. Jan signature.asc Description: OpenPGP digital signature ___ Xenomai-core mailing list Xenomai-core@gna.org https://mail.gna.org/listinfo/xenomai-core
Re: [Xenomai-core] [PULL] native: Fix msendq fastlock leakage
On 06/20/2011 07:07 PM, Jan Kiszka wrote: >> static inline void do_cleanup_event(struct mm_struct *mm) >> { >> +struct task_struct *p = current; >> +struct mm_struct *old; >> + >> +old = xnshadow_mm(p); >> +xnshadow_mmptd(p) = mm; >> + >> ppd_remove_mm(mm, &detach_ppd); >> + >> +xnshadow_mmptd(p) = old; > > I don't have the full picture yet, but that feels racy: If the context > over which we clean up that foreign mm is also using xnshadow_mmptd, > other threads in that process may dislike this temporary change. This mmptd is only used by xnshadow_ppd_get (which never dereferences it by the way). And if the current thread is running the cleanup event, it is not running any other service at the same time. So, this is safe. An alternative implementation would be to use some global per-cpu variable and disable the preemption during cleanup. But that would not fix the case of the shadow cleanups where current->mm is already null. The remaining problem is an irq interrupting the cleanup, and using xnshadow_ppd_get. But I would not expect that to happen. I mean, xnshadow_ppd_get is more a service to be used for implementation of system calls. >> diff --git a/ksrc/skins/posix/mutex.c b/ksrc/skins/posix/mutex.c >> index 6ce75e5..cc86852 100644 >> --- a/ksrc/skins/posix/mutex.c >> +++ b/ksrc/skins/posix/mutex.c >> @@ -219,10 +219,6 @@ void pse51_mutex_destroy_internal(pse51_mutex_t *mutex, >> xnlock_put_irqrestore(&nklock, s); >> >> #ifdef CONFIG_XENO_FASTSYNCH >> -/* We call xnheap_free even if the mutex is not pshared; when >> - this function is called from pse51_mutexq_cleanup, the >> - sem_heap is destroyed, or not the one to which the fastlock >> - belongs, xnheap will simply return an error. */ > > I think this comment is not completely obsolete. It still applies /wrt > shared/non-shared. If we apply this patch after the one changing the ppd cleanup order, everything falls in place (I can even put a warning in xnheap_destroy if the heap has some bytes still in use when destroyed). > If we can resolve that potential race, this looks like a nice solution. It may look a bit overkill, since it uses an adeos ptd. On the other hand, who else uses them anyway ;-) -- Gilles. ___ Xenomai-core mailing list Xenomai-core@gna.org https://mail.gna.org/listinfo/xenomai-core
Re: [Xenomai-core] [PULL] native: Fix msendq fastlock leakage
On 2011-06-19 15:00, Gilles Chanteperdrix wrote: > On 06/19/2011 01:17 PM, Gilles Chanteperdrix wrote: >> On 06/19/2011 12:14 PM, Gilles Chanteperdrix wrote: >>> I am working on this ppd cleanup issue again, I am asking for help to >>> find a fix in -head for all cases where the sys_ppd is needed during >>> some cleanup. >>> >>> The problem is that when the ppd cleanup is invoked: >>> - we have no guarantee that current is a thread from the Xenomai >>> application; >>> - if it is, current->mm is NULL. >>> >>> So, associating the sys_ppd to either current or current->mm does not >>> work. What we could do is pass the sys_ppd to all the other ppds cleanup >>> handlers, this would fix cases such as freeing mutexes fastlock, but >>> that does not help when the sys_ppd is needed during a thread deletion hook. >>> >>> I would like to find a solution where simply calling xnsys_ppd_get() >>> will work, where we do not have an xnsys_ppd_get for each context, such >>> as for instance xnsys_ppd_get_by_mm/xnsys_ppd_get_by_task_struct, >>> because it would be too error-prone. >>> >>> Any idea anyone? >> >> The best I could come up with: use a ptd to store the mm currently >> being cleaned up, so that xnshadow_ppd_get continues to work, even >> in the middle of a cleanup. > > In order to also get xnshadow_ppd_get to work in task deletion hooks > (which is needed to avoid the issue at the origin of this thread), we > also need to set this ptd upon shadow mapping, so it is still there > when reaching the task deletion hook (where current->mm may be NULL). > Hence the patch: > > diff --git a/ksrc/nucleus/shadow.c b/ksrc/nucleus/shadow.c > index b243600..6bc4210 100644 > --- a/ksrc/nucleus/shadow.c > +++ b/ksrc/nucleus/shadow.c > @@ -65,6 +65,11 @@ int nkthrptd; > EXPORT_SYMBOL_GPL(nkthrptd); > int nkerrptd; > EXPORT_SYMBOL_GPL(nkerrptd); > +int nkmmptd; > +EXPORT_SYMBOL_GPL(nkmmptd); > + > +#define xnshadow_mmptd(t) ((t)->ptd[nkmmptd]) > +#define xnshadow_mm(t) ((struct mm_struct *)xnshadow_mmptd(t)) xnshadow_mm() can now return a no longer existing mm. So no user of xnshadow_mm should ever dereference that pointer. Thus we better change all that user to treat the return value as a void pointer e.g. > > struct xnskin_slot { > struct xnskin_props *props; > @@ -1304,6 +1309,8 @@ int xnshadow_map(xnthread_t *thread, xncompletion_t > __user *u_completion, >* friends. >*/ > xnshadow_thrptd(current) = thread; > + xnshadow_mmptd(current) = current->mm; > + > rthal_enable_notifier(current); > > if (xnthread_base_priority(thread) == 0 && > @@ -2759,7 +2766,15 @@ static void detach_ppd(xnshadow_ppd_t * ppd) > > static inline void do_cleanup_event(struct mm_struct *mm) > { > + struct task_struct *p = current; > + struct mm_struct *old; > + > + old = xnshadow_mm(p); > + xnshadow_mmptd(p) = mm; > + > ppd_remove_mm(mm, &detach_ppd); > + > + xnshadow_mmptd(p) = old; I don't have the full picture yet, but that feels racy: If the context over which we clean up that foreign mm is also using xnshadow_mmptd, other threads in that process may dislike this temporary change. > } > > RTHAL_DECLARE_CLEANUP_EVENT(cleanup_event); > @@ -2925,7 +2940,7 @@ EXPORT_SYMBOL_GPL(xnshadow_unregister_interface); > xnshadow_ppd_t *xnshadow_ppd_get(unsigned muxid) > { > if (xnpod_userspace_p()) > - return ppd_lookup(muxid, current->mm); > + return ppd_lookup(muxid, xnshadow_mm(current) ?: current->mm); > > return NULL; > } > @@ -2960,8 +2975,9 @@ int xnshadow_mount(void) > sema_init(&completion_mutex, 1); > nkthrptd = rthal_alloc_ptdkey(); > nkerrptd = rthal_alloc_ptdkey(); > + nkmmptd = rthal_alloc_ptdkey(); > > - if (nkthrptd < 0 || nkerrptd < 0) { > + if (nkthrptd < 0 || nkerrptd < 0 || nkmmptd < 0) { > printk(KERN_ERR "Xenomai: cannot allocate PTD slots\n"); > return -ENOMEM; > } > diff --git a/ksrc/skins/posix/mutex.c b/ksrc/skins/posix/mutex.c > index 6ce75e5..cc86852 100644 > --- a/ksrc/skins/posix/mutex.c > +++ b/ksrc/skins/posix/mutex.c > @@ -219,10 +219,6 @@ void pse51_mutex_destroy_internal(pse51_mutex_t *mutex, > xnlock_put_irqrestore(&nklock, s); > > #ifdef CONFIG_XENO_FASTSYNCH > - /* We call xnheap_free even if the mutex is not pshared; when > -this function is called from pse51_mutexq_cleanup, the > -sem_heap is destroyed, or not the one to which the fastlock > -belongs, xnheap will simply return an error. */ I think this comment is not completely obsolete. It still applies /wrt shared/non-shared. > xnheap_free(&xnsys_ppd_get(mutex->attr.pshared)->sem_heap, > mutex->synchbase.fastlock); > #endif /* CONFIG_XENO_FASTSYNCH */ > > If we can resolve that potential race, this looks like a nice solution. Jan -- Siemens AG, Corporate Technology, CT T DE IT 1 Corporate Competence Cent
Re: [Xenomai-core] [PULL] native: Fix msendq fastlock leakage
On 06/19/2011 01:17 PM, Gilles Chanteperdrix wrote: > On 06/19/2011 12:14 PM, Gilles Chanteperdrix wrote: >> I am working on this ppd cleanup issue again, I am asking for help to >> find a fix in -head for all cases where the sys_ppd is needed during >> some cleanup. >> >> The problem is that when the ppd cleanup is invoked: >> - we have no guarantee that current is a thread from the Xenomai >> application; >> - if it is, current->mm is NULL. >> >> So, associating the sys_ppd to either current or current->mm does not >> work. What we could do is pass the sys_ppd to all the other ppds cleanup >> handlers, this would fix cases such as freeing mutexes fastlock, but >> that does not help when the sys_ppd is needed during a thread deletion hook. >> >> I would like to find a solution where simply calling xnsys_ppd_get() >> will work, where we do not have an xnsys_ppd_get for each context, such >> as for instance xnsys_ppd_get_by_mm/xnsys_ppd_get_by_task_struct, >> because it would be too error-prone. >> >> Any idea anyone? > > The best I could come up with: use a ptd to store the mm currently > being cleaned up, so that xnshadow_ppd_get continues to work, even > in the middle of a cleanup. In order to also get xnshadow_ppd_get to work in task deletion hooks (which is needed to avoid the issue at the origin of this thread), we also need to set this ptd upon shadow mapping, so it is still there when reaching the task deletion hook (where current->mm may be NULL). Hence the patch: diff --git a/ksrc/nucleus/shadow.c b/ksrc/nucleus/shadow.c index b243600..6bc4210 100644 --- a/ksrc/nucleus/shadow.c +++ b/ksrc/nucleus/shadow.c @@ -65,6 +65,11 @@ int nkthrptd; EXPORT_SYMBOL_GPL(nkthrptd); int nkerrptd; EXPORT_SYMBOL_GPL(nkerrptd); +int nkmmptd; +EXPORT_SYMBOL_GPL(nkmmptd); + +#define xnshadow_mmptd(t) ((t)->ptd[nkmmptd]) +#define xnshadow_mm(t) ((struct mm_struct *)xnshadow_mmptd(t)) struct xnskin_slot { struct xnskin_props *props; @@ -1304,6 +1309,8 @@ int xnshadow_map(xnthread_t *thread, xncompletion_t __user *u_completion, * friends. */ xnshadow_thrptd(current) = thread; + xnshadow_mmptd(current) = current->mm; + rthal_enable_notifier(current); if (xnthread_base_priority(thread) == 0 && @@ -2759,7 +2766,15 @@ static void detach_ppd(xnshadow_ppd_t * ppd) static inline void do_cleanup_event(struct mm_struct *mm) { + struct task_struct *p = current; + struct mm_struct *old; + + old = xnshadow_mm(p); + xnshadow_mmptd(p) = mm; + ppd_remove_mm(mm, &detach_ppd); + + xnshadow_mmptd(p) = old; } RTHAL_DECLARE_CLEANUP_EVENT(cleanup_event); @@ -2925,7 +2940,7 @@ EXPORT_SYMBOL_GPL(xnshadow_unregister_interface); xnshadow_ppd_t *xnshadow_ppd_get(unsigned muxid) { if (xnpod_userspace_p()) - return ppd_lookup(muxid, current->mm); + return ppd_lookup(muxid, xnshadow_mm(current) ?: current->mm); return NULL; } @@ -2960,8 +2975,9 @@ int xnshadow_mount(void) sema_init(&completion_mutex, 1); nkthrptd = rthal_alloc_ptdkey(); nkerrptd = rthal_alloc_ptdkey(); + nkmmptd = rthal_alloc_ptdkey(); - if (nkthrptd < 0 || nkerrptd < 0) { + if (nkthrptd < 0 || nkerrptd < 0 || nkmmptd < 0) { printk(KERN_ERR "Xenomai: cannot allocate PTD slots\n"); return -ENOMEM; } diff --git a/ksrc/skins/posix/mutex.c b/ksrc/skins/posix/mutex.c index 6ce75e5..cc86852 100644 --- a/ksrc/skins/posix/mutex.c +++ b/ksrc/skins/posix/mutex.c @@ -219,10 +219,6 @@ void pse51_mutex_destroy_internal(pse51_mutex_t *mutex, xnlock_put_irqrestore(&nklock, s); #ifdef CONFIG_XENO_FASTSYNCH - /* We call xnheap_free even if the mutex is not pshared; when - this function is called from pse51_mutexq_cleanup, the - sem_heap is destroyed, or not the one to which the fastlock - belongs, xnheap will simply return an error. */ xnheap_free(&xnsys_ppd_get(mutex->attr.pshared)->sem_heap, mutex->synchbase.fastlock); #endif /* CONFIG_XENO_FASTSYNCH */ -- Gilles. ___ Xenomai-core mailing list Xenomai-core@gna.org https://mail.gna.org/listinfo/xenomai-core
Re: [Xenomai-core] [PULL] native: Fix msendq fastlock leakage
On 06/19/2011 12:14 PM, Gilles Chanteperdrix wrote: > I am working on this ppd cleanup issue again, I am asking for help to > find a fix in -head for all cases where the sys_ppd is needed during > some cleanup. > > The problem is that when the ppd cleanup is invoked: > - we have no guarantee that current is a thread from the Xenomai > application; > - if it is, current->mm is NULL. > > So, associating the sys_ppd to either current or current->mm does not > work. What we could do is pass the sys_ppd to all the other ppds cleanup > handlers, this would fix cases such as freeing mutexes fastlock, but > that does not help when the sys_ppd is needed during a thread deletion hook. > > I would like to find a solution where simply calling xnsys_ppd_get() > will work, where we do not have an xnsys_ppd_get for each context, such > as for instance xnsys_ppd_get_by_mm/xnsys_ppd_get_by_task_struct, > because it would be too error-prone. > > Any idea anyone? The best I could come up with: use a ptd to store the mm currently being cleaned up, so that xnshadow_ppd_get continues to work, even in the middle of a cleanup. diff --git a/ksrc/nucleus/shadow.c b/ksrc/nucleus/shadow.c index b243600..b542b4f 100644 --- a/ksrc/nucleus/shadow.c +++ b/ksrc/nucleus/shadow.c @@ -65,6 +65,8 @@ int nkthrptd; EXPORT_SYMBOL_GPL(nkthrptd); int nkerrptd; EXPORT_SYMBOL_GPL(nkerrptd); +int nkmmptd; +EXPORT_SYMBOL_GPL(nkmmptd); struct xnskin_slot { struct xnskin_props *props; @@ -2759,7 +2761,14 @@ static void detach_ppd(xnshadow_ppd_t * ppd) static inline void do_cleanup_event(struct mm_struct *mm) { + struct mm_struct *old; + + old = (struct mm_struct *)(current->ptd[nkmmptd]); + current->ptd[nkmmptd] = mm; + ppd_remove_mm(mm, &detach_ppd); + + current->ptd[nkmmptd] = old; } RTHAL_DECLARE_CLEANUP_EVENT(cleanup_event); @@ -2924,8 +2933,14 @@ EXPORT_SYMBOL_GPL(xnshadow_unregister_interface); */ xnshadow_ppd_t *xnshadow_ppd_get(unsigned muxid) { - if (xnpod_userspace_p()) - return ppd_lookup(muxid, current->mm); + if (xnpod_userspace_p()) { + struct mm_struct *mm; + + mm = (struct mm_struct *)current->ptd[nkmmptd] ?: current->mm; + /* ptd is not null if we are currently cleaning up an mm */ + + return ppd_lookup(muxid, mm); + } return NULL; } @@ -2960,8 +2975,9 @@ int xnshadow_mount(void) sema_init(&completion_mutex, 1); nkthrptd = rthal_alloc_ptdkey(); nkerrptd = rthal_alloc_ptdkey(); + nkmmptd = rthal_alloc_ptdkey(); - if (nkthrptd < 0 || nkerrptd < 0) { + if (nkthrptd < 0 || nkerrptd < 0 || nkmmptd < 0) { printk(KERN_ERR "Xenomai: cannot allocate PTD slots\n"); return -ENOMEM; } diff --git a/ksrc/skins/posix/mutex.c b/ksrc/skins/posix/mutex.c index 6ce75e5..cc86852 100644 --- a/ksrc/skins/posix/mutex.c +++ b/ksrc/skins/posix/mutex.c @@ -219,10 +219,6 @@ void pse51_mutex_destroy_internal(pse51_mutex_t *mutex, xnlock_put_irqrestore(&nklock, s); #ifdef CONFIG_XENO_FASTSYNCH - /* We call xnheap_free even if the mutex is not pshared; when - this function is called from pse51_mutexq_cleanup, the - sem_heap is destroyed, or not the one to which the fastlock - belongs, xnheap will simply return an error. */ xnheap_free(&xnsys_ppd_get(mutex->attr.pshared)->sem_heap, mutex->synchbase.fastlock); #endif /* CONFIG_XENO_FASTSYNCH */ -- Gilles. ___ Xenomai-core mailing list Xenomai-core@gna.org https://mail.gna.org/listinfo/xenomai-core
Re: [Xenomai-core] [PULL] native: Fix msendq fastlock leakage
On 05/23/2011 03:53 PM, Jan Kiszka wrote: > The following changes since commit aec30a2543afa18fa7832deee85e187b0faeb1f0: > > xeno-test: fix reference to @XENO_TEST_DIR@ (2011-05-15 21:20:41 +0200) > > are available in the git repository at: > git://git.xenomai.org/xenomai-jki.git for-upstream > > Jan Kiszka (1): > native: Fix msendq fastlock leakage > > include/native/task.h|5 + > ksrc/skins/native/task.c | 13 ++--- > 2 files changed, 11 insertions(+), 7 deletions(-) > > --8<-- > > When a native task terminates without going through rt_task_delete, we > leaked the fastlock so far. Fix it by moving the release into the delete > hook. As the ppd is already invalid at that point, we have to save the > heap address in the task data structure. I am working on this ppd cleanup issue again, I am asking for help to find a fix in -head for all cases where the sys_ppd is needed during some cleanup. The problem is that when the ppd cleanup is invoked: - we have no guarantee that current is a thread from the Xenomai application; - if it is, current->mm is NULL. So, associating the sys_ppd to either current or current->mm does not work. What we could do is pass the sys_ppd to all the other ppds cleanup handlers, this would fix cases such as freeing mutexes fastlock, but that does not help when the sys_ppd is needed during a thread deletion hook. I would like to find a solution where simply calling xnsys_ppd_get() will work, where we do not have an xnsys_ppd_get for each context, such as for instance xnsys_ppd_get_by_mm/xnsys_ppd_get_by_task_struct, because it would be too error-prone. Any idea anyone? -- Gilles. ___ Xenomai-core mailing list Xenomai-core@gna.org https://mail.gna.org/listinfo/xenomai-core
Re: [Xenomai-core] [PULL] native: Fix msendq fastlock leakage
On 05/26/2011 09:37 AM, Jan Kiszka wrote: > On 2011-05-26 09:29, Gilles Chanteperdrix wrote: >> On 05/26/2011 09:18 AM, Jan Kiszka wrote: >>> On 2011-05-25 20:48, Gilles Chanteperdrix wrote: On 05/25/2011 02:22 PM, Jan Kiszka wrote: > On 2011-05-25 14:19, Gilles Chanteperdrix wrote: >> On 05/25/2011 02:12 PM, Jan Kiszka wrote: >>> On 2011-05-25 13:58, Gilles Chanteperdrix wrote: On 05/25/2011 01:20 PM, Jan Kiszka wrote: > On 2011-05-24 16:03, Gilles Chanteperdrix wrote: >> On 05/24/2011 03:52 PM, Jan Kiszka wrote: >>> On 2011-05-24 14:30, Gilles Chanteperdrix wrote: > Do you already have an idea how to get that info to the > delete hook > function? Yes. We start by not applying the list reversal patch, then the sys_ppd is the first in the list. So, we can, in the function ppd_remove_mm, start by removing all the others ppd, then remove the sys ppd (that is the first), last. This changes a few signatures in the core code, a lot of things in the skin code, but that would be for the better... >>> >>> I still don't see how this affects the order we use in >>> do_taskexit_event, the one that prevents xnsys_get_ppd usage >>> even when >>> the mm is still present. >> >> The idea is to change the cleanup routines not to call >> xnsys_get_ppd. > > ...and use what instead? Sorry, I'm slow today. The sys_ppd passed as other argument to the cleanup function. >>> >>> That would affect all thread hooks, not only the one for deletion. >>> And >>> it would pull in more shadow-specific bits into the pod. >>> >>> Moreover, I think we would still be in troubles as mm, thus ppd, >>> deletion takes place before last task deletion, thus taskexit hook >>> invocation. That's due to the cleanup ordering in the kernel's >>> do_exit. >>> >>> However, if you have a patch, I'd be happy to test and rework my >>> leakage >>> fix. >> >> I will work on this ASAP. > > Sorry for pushing, but I need to decide if we should role out my > imperfect fix or if there is chance to use some upstream version > directly. Were you able to look into this, or will this likely take a > bit more time? I intended to try and do this next week-end. If it is more urgent than that, I can try in one or two days. In any case, I do not think we should try and workaround the current code, it is way to fragile. >>> >>> Mmh, might be true. I'm getting the feeling we should locally revert all >>> the recent MPS changes to work around the issues. It looks like there >>> are more related problems sleeping (we are still facing spurious >>> fast-synch related crashes here - examining ATM). >> >> This is the development head, it may remain broken for short times while >> we are fixing. I would understand reverting on the 2.5 branch, not on >> -head. > > I was thinking loudly about our (Siemens) local branch, not -head. We > are forced to use head to have features like automatic non-RT shadow > migration. Now that I think about it, leaking a few bytes in the private heap is completely harmless, since it is destroyed anyway, >>> >>> Not at all harmless if you create and destroy tasks without restarting >>> the application. That's what our users are do, so this bug is killing them. >> >> Still, it should not cause crashes. Only allocation returning NULL at >> some point. > > That might be true. Reverting some suspicious patches did not resolve > the problem so far. > >> We do not care, we only use the mm pointer value as a key, and this one is still available when the exit callback is called. So, we have the mm pointer, we can have the process private ppd, normally, we have all that we need. It is just a question of finding an interface which is not too intrusive. >>> >>> The problem is that the heap we need to release the fastlock to will >>> already be history at this point - classic use after release... >> >> As I said, xnheap_free is robust against this, so, this user after >> release does not cause any trouble. But I have also already agreed that >> we should fix it. > > xnheap_test_and_free is not at all robust against working on an invalid > heap object. The heap object is not really invalid, it has been scheduled to be freed, but has not been freed yet. But OK, let us stop discussing this, yes, this needs fixing. -- Gil
Re: [Xenomai-core] [PULL] native: Fix msendq fastlock leakage
On 2011-05-26 09:29, Gilles Chanteperdrix wrote: > On 05/26/2011 09:18 AM, Jan Kiszka wrote: >> On 2011-05-25 20:48, Gilles Chanteperdrix wrote: >>> On 05/25/2011 02:22 PM, Jan Kiszka wrote: On 2011-05-25 14:19, Gilles Chanteperdrix wrote: > On 05/25/2011 02:12 PM, Jan Kiszka wrote: >> On 2011-05-25 13:58, Gilles Chanteperdrix wrote: >>> On 05/25/2011 01:20 PM, Jan Kiszka wrote: On 2011-05-24 16:03, Gilles Chanteperdrix wrote: > On 05/24/2011 03:52 PM, Jan Kiszka wrote: >> On 2011-05-24 14:30, Gilles Chanteperdrix wrote: Do you already have an idea how to get that info to the delete hook function? >>> >>> Yes. We start by not applying the list reversal patch, then the >>> sys_ppd >>> is the first in the list. So, we can, in the function >>> ppd_remove_mm, >>> start by removing all the others ppd, then remove the sys ppd >>> (that is >>> the first), last. This changes a few signatures in the core >>> code, a lot >>> of things in the skin code, but that would be for the better... >> >> I still don't see how this affects the order we use in >> do_taskexit_event, the one that prevents xnsys_get_ppd usage >> even when >> the mm is still present. > > The idea is to change the cleanup routines not to call > xnsys_get_ppd. ...and use what instead? Sorry, I'm slow today. >>> >>> The sys_ppd passed as other argument to the cleanup function. >> >> That would affect all thread hooks, not only the one for deletion. >> And >> it would pull in more shadow-specific bits into the pod. >> >> Moreover, I think we would still be in troubles as mm, thus ppd, >> deletion takes place before last task deletion, thus taskexit hook >> invocation. That's due to the cleanup ordering in the kernel's >> do_exit. >> >> However, if you have a patch, I'd be happy to test and rework my >> leakage >> fix. > > I will work on this ASAP. Sorry for pushing, but I need to decide if we should role out my imperfect fix or if there is chance to use some upstream version directly. Were you able to look into this, or will this likely take a bit more time? >>> >>> I intended to try and do this next week-end. If it is more urgent than >>> that, I can try in one or two days. In any case, I do not think we >>> should try and workaround the current code, it is way to fragile. >> >> Mmh, might be true. I'm getting the feeling we should locally revert all >> the recent MPS changes to work around the issues. It looks like there >> are more related problems sleeping (we are still facing spurious >> fast-synch related crashes here - examining ATM). > > This is the development head, it may remain broken for short times while > we are fixing. I would understand reverting on the 2.5 branch, not on > -head. I was thinking loudly about our (Siemens) local branch, not -head. We are forced to use head to have features like automatic non-RT shadow migration. >>> >>> Now that I think about it, leaking a few bytes in the private heap is >>> completely harmless, since it is destroyed anyway, >> >> Not at all harmless if you create and destroy tasks without restarting >> the application. That's what our users are do, so this bug is killing them. > > Still, it should not cause crashes. Only allocation returning NULL at > some point. That might be true. Reverting some suspicious patches did not resolve the problem so far. > >>> We do not care, we only use the mm pointer value as a key, and this one >>> is still available when the exit callback is called. So, we have the mm >>> pointer, we can have the process private ppd, normally, we have all that >>> we need. It is just a question of finding an interface which is not too >>> intrusive. >> >> The problem is that the heap we need to release the fastlock to will >> already be history at this point - classic use after release... > > As I said, xnheap_free is robust against this, so, this user after > release does not cause any trouble. But I have also already agreed that > we should fix it. xnheap_test_and_free is not at all robust against working on an invalid heap object. Jan -- Siemens AG, Corporate Technology, CT T DE IT 1 Corporate Competence Center Embedded Linux ___ Xenomai-core mailing list Xenomai-core@gna.org https://mail.gna.org/listinfo/xenomai-core
Re: [Xenomai-core] [PULL] native: Fix msendq fastlock leakage
On 05/26/2011 09:18 AM, Jan Kiszka wrote: > On 2011-05-25 20:48, Gilles Chanteperdrix wrote: >> On 05/25/2011 02:22 PM, Jan Kiszka wrote: >>> On 2011-05-25 14:19, Gilles Chanteperdrix wrote: On 05/25/2011 02:12 PM, Jan Kiszka wrote: > On 2011-05-25 13:58, Gilles Chanteperdrix wrote: >> On 05/25/2011 01:20 PM, Jan Kiszka wrote: >>> On 2011-05-24 16:03, Gilles Chanteperdrix wrote: On 05/24/2011 03:52 PM, Jan Kiszka wrote: > On 2011-05-24 14:30, Gilles Chanteperdrix wrote: >>> Do you already have an idea how to get that info to the delete >>> hook >>> function? >> >> Yes. We start by not applying the list reversal patch, then the >> sys_ppd >> is the first in the list. So, we can, in the function >> ppd_remove_mm, >> start by removing all the others ppd, then remove the sys ppd >> (that is >> the first), last. This changes a few signatures in the core >> code, a lot >> of things in the skin code, but that would be for the better... > > I still don't see how this affects the order we use in > do_taskexit_event, the one that prevents xnsys_get_ppd usage even > when > the mm is still present. The idea is to change the cleanup routines not to call xnsys_get_ppd. >>> >>> ...and use what instead? Sorry, I'm slow today. >> >> The sys_ppd passed as other argument to the cleanup function. > > That would affect all thread hooks, not only the one for deletion. And > it would pull in more shadow-specific bits into the pod. > > Moreover, I think we would still be in troubles as mm, thus ppd, > deletion takes place before last task deletion, thus taskexit hook > invocation. That's due to the cleanup ordering in the kernel's > do_exit. > > However, if you have a patch, I'd be happy to test and rework my > leakage > fix. I will work on this ASAP. >>> >>> Sorry for pushing, but I need to decide if we should role out my >>> imperfect fix or if there is chance to use some upstream version >>> directly. Were you able to look into this, or will this likely take a >>> bit more time? >> >> I intended to try and do this next week-end. If it is more urgent than >> that, I can try in one or two days. In any case, I do not think we >> should try and workaround the current code, it is way to fragile. > > Mmh, might be true. I'm getting the feeling we should locally revert all > the recent MPS changes to work around the issues. It looks like there > are more related problems sleeping (we are still facing spurious > fast-synch related crashes here - examining ATM). This is the development head, it may remain broken for short times while we are fixing. I would understand reverting on the 2.5 branch, not on -head. >>> >>> I was thinking loudly about our (Siemens) local branch, not -head. We >>> are forced to use head to have features like automatic non-RT shadow >>> migration. >> >> Now that I think about it, leaking a few bytes in the private heap is >> completely harmless, since it is destroyed anyway, > > Not at all harmless if you create and destroy tasks without restarting > the application. That's what our users are do, so this bug is killing them. Still, it should not cause crashes. Only allocation returning NULL at some point. >> We do not care, we only use the mm pointer value as a key, and this one >> is still available when the exit callback is called. So, we have the mm >> pointer, we can have the process private ppd, normally, we have all that >> we need. It is just a question of finding an interface which is not too >> intrusive. > > The problem is that the heap we need to release the fastlock to will > already be history at this point - classic use after release... As I said, xnheap_free is robust against this, so, this user after release does not cause any trouble. But I have also already agreed that we should fix it. -- Gilles. ___ Xenomai-core mailing list Xenomai-core@gna.org https://mail.gna.org/listinfo/xenomai-core
Re: [Xenomai-core] [PULL] native: Fix msendq fastlock leakage
On 2011-05-25 20:48, Gilles Chanteperdrix wrote: > On 05/25/2011 02:22 PM, Jan Kiszka wrote: >> On 2011-05-25 14:19, Gilles Chanteperdrix wrote: >>> On 05/25/2011 02:12 PM, Jan Kiszka wrote: On 2011-05-25 13:58, Gilles Chanteperdrix wrote: > On 05/25/2011 01:20 PM, Jan Kiszka wrote: >> On 2011-05-24 16:03, Gilles Chanteperdrix wrote: >>> On 05/24/2011 03:52 PM, Jan Kiszka wrote: On 2011-05-24 14:30, Gilles Chanteperdrix wrote: >> Do you already have an idea how to get that info to the delete >> hook >> function? > > Yes. We start by not applying the list reversal patch, then the > sys_ppd > is the first in the list. So, we can, in the function > ppd_remove_mm, > start by removing all the others ppd, then remove the sys ppd > (that is > the first), last. This changes a few signatures in the core code, > a lot > of things in the skin code, but that would be for the better... I still don't see how this affects the order we use in do_taskexit_event, the one that prevents xnsys_get_ppd usage even when the mm is still present. >>> >>> The idea is to change the cleanup routines not to call >>> xnsys_get_ppd. >> >> ...and use what instead? Sorry, I'm slow today. > > The sys_ppd passed as other argument to the cleanup function. That would affect all thread hooks, not only the one for deletion. And it would pull in more shadow-specific bits into the pod. Moreover, I think we would still be in troubles as mm, thus ppd, deletion takes place before last task deletion, thus taskexit hook invocation. That's due to the cleanup ordering in the kernel's do_exit. However, if you have a patch, I'd be happy to test and rework my leakage fix. >>> >>> I will work on this ASAP. >> >> Sorry for pushing, but I need to decide if we should role out my >> imperfect fix or if there is chance to use some upstream version >> directly. Were you able to look into this, or will this likely take a >> bit more time? > > I intended to try and do this next week-end. If it is more urgent than > that, I can try in one or two days. In any case, I do not think we > should try and workaround the current code, it is way to fragile. Mmh, might be true. I'm getting the feeling we should locally revert all the recent MPS changes to work around the issues. It looks like there are more related problems sleeping (we are still facing spurious fast-synch related crashes here - examining ATM). >>> >>> This is the development head, it may remain broken for short times while >>> we are fixing. I would understand reverting on the 2.5 branch, not on -head. >> >> I was thinking loudly about our (Siemens) local branch, not -head. We >> are forced to use head to have features like automatic non-RT shadow >> migration. > > Now that I think about it, leaking a few bytes in the private heap is > completely harmless, since it is destroyed anyway, Not at all harmless if you create and destroy tasks without restarting the application. That's what our users are do, so this bug is killing them. > and xnheap_free does > enough checks to avoid clobbering already freed memory, though > destroying this heap last is the way to go, as I have already said. > > A bit less in the global heap, but this one should be used less often, > and from your explanation, I understand this is not the case you are > interested in, otherwise you would not care if xnpod_userspace_p() is > working. > > In any case, it should not cause crashes, it is just a leak. > >> >>> Another thing that just came to my mind: Do we have a well-defined destruction order of native skin or native tasks vs. system skin? I mean the latter destroys the local sem_heap while the former may purge remaining native resources (including the MPS fastlock). I think the ordering is inverted to what the code assumes (heap is destructed before the last task), no? >>> >>> IMO, the system skin destroy callback should be called last, this should >>> solve these problems. This is what I was talking about. >> >> OK. Still, tasks aren't destroyed on mm shoot-down but via a dedicated >> do_exit callback that is invoke by the kernel a few instructions later. >> Nothing we can directly influence from Xenomai code. > > We do not care, we only use the mm pointer value as a key, and this one > is still available when the exit callback is called. So, we have the mm > pointer, we can have the process private ppd, normally, we have all that > we need. It is just a question of finding an interface which is not too > intrusive. The problem
Re: [Xenomai-core] [PULL] native: Fix msendq fastlock leakage
On 05/25/2011 02:22 PM, Jan Kiszka wrote: > On 2011-05-25 14:19, Gilles Chanteperdrix wrote: >> On 05/25/2011 02:12 PM, Jan Kiszka wrote: >>> On 2011-05-25 13:58, Gilles Chanteperdrix wrote: On 05/25/2011 01:20 PM, Jan Kiszka wrote: > On 2011-05-24 16:03, Gilles Chanteperdrix wrote: >> On 05/24/2011 03:52 PM, Jan Kiszka wrote: >>> On 2011-05-24 14:30, Gilles Chanteperdrix wrote: > Do you already have an idea how to get that info to the delete > hook > function? Yes. We start by not applying the list reversal patch, then the sys_ppd is the first in the list. So, we can, in the function ppd_remove_mm, start by removing all the others ppd, then remove the sys ppd (that is the first), last. This changes a few signatures in the core code, a lot of things in the skin code, but that would be for the better... >>> >>> I still don't see how this affects the order we use in >>> do_taskexit_event, the one that prevents xnsys_get_ppd usage even >>> when >>> the mm is still present. >> >> The idea is to change the cleanup routines not to call xnsys_get_ppd. > > ...and use what instead? Sorry, I'm slow today. The sys_ppd passed as other argument to the cleanup function. >>> >>> That would affect all thread hooks, not only the one for deletion. And >>> it would pull in more shadow-specific bits into the pod. >>> >>> Moreover, I think we would still be in troubles as mm, thus ppd, >>> deletion takes place before last task deletion, thus taskexit hook >>> invocation. That's due to the cleanup ordering in the kernel's do_exit. >>> >>> However, if you have a patch, I'd be happy to test and rework my leakage >>> fix. >> >> I will work on this ASAP. > > Sorry for pushing, but I need to decide if we should role out my > imperfect fix or if there is chance to use some upstream version > directly. Were you able to look into this, or will this likely take a > bit more time? I intended to try and do this next week-end. If it is more urgent than that, I can try in one or two days. In any case, I do not think we should try and workaround the current code, it is way to fragile. >>> >>> Mmh, might be true. I'm getting the feeling we should locally revert all >>> the recent MPS changes to work around the issues. It looks like there >>> are more related problems sleeping (we are still facing spurious >>> fast-synch related crashes here - examining ATM). >> >> This is the development head, it may remain broken for short times while >> we are fixing. I would understand reverting on the 2.5 branch, not on -head. > > I was thinking loudly about our (Siemens) local branch, not -head. We > are forced to use head to have features like automatic non-RT shadow > migration. Now that I think about it, leaking a few bytes in the private heap is completely harmless, since it is destroyed anyway, and xnheap_free does enough checks to avoid clobbering already freed memory, though destroying this heap last is the way to go, as I have already said. A bit less in the global heap, but this one should be used less often, and from your explanation, I understand this is not the case you are interested in, otherwise you would not care if xnpod_userspace_p() is working. In any case, it should not cause crashes, it is just a leak. > >> >>> Another thing that just came to my mind: Do we have a well-defined >>> destruction order of native skin or native tasks vs. system skin? I mean >>> the latter destroys the local sem_heap while the former may purge >>> remaining native resources (including the MPS fastlock). I think the >>> ordering is inverted to what the code assumes (heap is destructed before >>> the last task), no? >> >> IMO, the system skin destroy callback should be called last, this should >> solve these problems. This is what I was talking about. > > OK. Still, tasks aren't destroyed on mm shoot-down but via a dedicated > do_exit callback that is invoke by the kernel a few instructions later. > Nothing we can directly influence from Xenomai code. We do not care, we only use the mm pointer value as a key, and this one is still available when the exit callback is called. So, we have the mm pointer, we can have the process private ppd, normally, we have all that we need. It is just a question of finding an interface which is not too intrusive. -- Gilles. ___ Xenomai-core mailing list Xenomai-core@gna.org https://mail.gna.org/listinfo/xenomai-core
Re: [Xenomai-core] [PULL] native: Fix msendq fastlock leakage
On 2011-05-25 14:19, Gilles Chanteperdrix wrote: > On 05/25/2011 02:12 PM, Jan Kiszka wrote: >> On 2011-05-25 13:58, Gilles Chanteperdrix wrote: >>> On 05/25/2011 01:20 PM, Jan Kiszka wrote: On 2011-05-24 16:03, Gilles Chanteperdrix wrote: > On 05/24/2011 03:52 PM, Jan Kiszka wrote: >> On 2011-05-24 14:30, Gilles Chanteperdrix wrote: Do you already have an idea how to get that info to the delete hook function? >>> >>> Yes. We start by not applying the list reversal patch, then the >>> sys_ppd >>> is the first in the list. So, we can, in the function ppd_remove_mm, >>> start by removing all the others ppd, then remove the sys ppd (that >>> is >>> the first), last. This changes a few signatures in the core code, a >>> lot >>> of things in the skin code, but that would be for the better... >> >> I still don't see how this affects the order we use in >> do_taskexit_event, the one that prevents xnsys_get_ppd usage even >> when >> the mm is still present. > > The idea is to change the cleanup routines not to call xnsys_get_ppd. ...and use what instead? Sorry, I'm slow today. >>> >>> The sys_ppd passed as other argument to the cleanup function. >> >> That would affect all thread hooks, not only the one for deletion. And >> it would pull in more shadow-specific bits into the pod. >> >> Moreover, I think we would still be in troubles as mm, thus ppd, >> deletion takes place before last task deletion, thus taskexit hook >> invocation. That's due to the cleanup ordering in the kernel's do_exit. >> >> However, if you have a patch, I'd be happy to test and rework my leakage >> fix. > > I will work on this ASAP. Sorry for pushing, but I need to decide if we should role out my imperfect fix or if there is chance to use some upstream version directly. Were you able to look into this, or will this likely take a bit more time? >>> >>> I intended to try and do this next week-end. If it is more urgent than >>> that, I can try in one or two days. In any case, I do not think we >>> should try and workaround the current code, it is way to fragile. >> >> Mmh, might be true. I'm getting the feeling we should locally revert all >> the recent MPS changes to work around the issues. It looks like there >> are more related problems sleeping (we are still facing spurious >> fast-synch related crashes here - examining ATM). > > This is the development head, it may remain broken for short times while > we are fixing. I would understand reverting on the 2.5 branch, not on -head. I was thinking loudly about our (Siemens) local branch, not -head. We are forced to use head to have features like automatic non-RT shadow migration. > >> Another thing that just came to my mind: Do we have a well-defined >> destruction order of native skin or native tasks vs. system skin? I mean >> the latter destroys the local sem_heap while the former may purge >> remaining native resources (including the MPS fastlock). I think the >> ordering is inverted to what the code assumes (heap is destructed before >> the last task), no? > > IMO, the system skin destroy callback should be called last, this should > solve these problems. This is what I was talking about. OK. Still, tasks aren't destroyed on mm shoot-down but via a dedicated do_exit callback that is invoke by the kernel a few instructions later. Nothing we can directly influence from Xenomai code. Jan -- Siemens AG, Corporate Technology, CT T DE IT 1 Corporate Competence Center Embedded Linux ___ Xenomai-core mailing list Xenomai-core@gna.org https://mail.gna.org/listinfo/xenomai-core
Re: [Xenomai-core] [PULL] native: Fix msendq fastlock leakage
On 05/25/2011 02:12 PM, Jan Kiszka wrote: > On 2011-05-25 13:58, Gilles Chanteperdrix wrote: >> On 05/25/2011 01:20 PM, Jan Kiszka wrote: >>> On 2011-05-24 16:03, Gilles Chanteperdrix wrote: On 05/24/2011 03:52 PM, Jan Kiszka wrote: > On 2011-05-24 14:30, Gilles Chanteperdrix wrote: >>> Do you already have an idea how to get that info to the delete hook >>> function? >> >> Yes. We start by not applying the list reversal patch, then the >> sys_ppd >> is the first in the list. So, we can, in the function ppd_remove_mm, >> start by removing all the others ppd, then remove the sys ppd (that >> is >> the first), last. This changes a few signatures in the core code, a >> lot >> of things in the skin code, but that would be for the better... > > I still don't see how this affects the order we use in > do_taskexit_event, the one that prevents xnsys_get_ppd usage even when > the mm is still present. The idea is to change the cleanup routines not to call xnsys_get_ppd. >>> >>> ...and use what instead? Sorry, I'm slow today. >> >> The sys_ppd passed as other argument to the cleanup function. > > That would affect all thread hooks, not only the one for deletion. And > it would pull in more shadow-specific bits into the pod. > > Moreover, I think we would still be in troubles as mm, thus ppd, > deletion takes place before last task deletion, thus taskexit hook > invocation. That's due to the cleanup ordering in the kernel's do_exit. > > However, if you have a patch, I'd be happy to test and rework my leakage > fix. I will work on this ASAP. >>> >>> Sorry for pushing, but I need to decide if we should role out my >>> imperfect fix or if there is chance to use some upstream version >>> directly. Were you able to look into this, or will this likely take a >>> bit more time? >> >> I intended to try and do this next week-end. If it is more urgent than >> that, I can try in one or two days. In any case, I do not think we >> should try and workaround the current code, it is way to fragile. > > Mmh, might be true. I'm getting the feeling we should locally revert all > the recent MPS changes to work around the issues. It looks like there > are more related problems sleeping (we are still facing spurious > fast-synch related crashes here - examining ATM). This is the development head, it may remain broken for short times while we are fixing. I would understand reverting on the 2.5 branch, not on -head. > Another thing that just came to my mind: Do we have a well-defined > destruction order of native skin or native tasks vs. system skin? I mean > the latter destroys the local sem_heap while the former may purge > remaining native resources (including the MPS fastlock). I think the > ordering is inverted to what the code assumes (heap is destructed before > the last task), no? IMO, the system skin destroy callback should be called last, this should solve these problems. This is what I was talking about. -- Gilles. ___ Xenomai-core mailing list Xenomai-core@gna.org https://mail.gna.org/listinfo/xenomai-core
Re: [Xenomai-core] [PULL] native: Fix msendq fastlock leakage
On 2011-05-25 13:58, Gilles Chanteperdrix wrote: > On 05/25/2011 01:20 PM, Jan Kiszka wrote: >> On 2011-05-24 16:03, Gilles Chanteperdrix wrote: >>> On 05/24/2011 03:52 PM, Jan Kiszka wrote: On 2011-05-24 14:30, Gilles Chanteperdrix wrote: >> Do you already have an idea how to get that info to the delete hook >> function? > > Yes. We start by not applying the list reversal patch, then the > sys_ppd > is the first in the list. So, we can, in the function ppd_remove_mm, > start by removing all the others ppd, then remove the sys ppd (that is > the first), last. This changes a few signatures in the core code, a > lot > of things in the skin code, but that would be for the better... I still don't see how this affects the order we use in do_taskexit_event, the one that prevents xnsys_get_ppd usage even when the mm is still present. >>> >>> The idea is to change the cleanup routines not to call xnsys_get_ppd. >> >> ...and use what instead? Sorry, I'm slow today. > > The sys_ppd passed as other argument to the cleanup function. That would affect all thread hooks, not only the one for deletion. And it would pull in more shadow-specific bits into the pod. Moreover, I think we would still be in troubles as mm, thus ppd, deletion takes place before last task deletion, thus taskexit hook invocation. That's due to the cleanup ordering in the kernel's do_exit. However, if you have a patch, I'd be happy to test and rework my leakage fix. >>> >>> I will work on this ASAP. >> >> Sorry for pushing, but I need to decide if we should role out my >> imperfect fix or if there is chance to use some upstream version >> directly. Were you able to look into this, or will this likely take a >> bit more time? > > I intended to try and do this next week-end. If it is more urgent than > that, I can try in one or two days. In any case, I do not think we > should try and workaround the current code, it is way to fragile. Mmh, might be true. I'm getting the feeling we should locally revert all the recent MPS changes to work around the issues. It looks like there are more related problems sleeping (we are still facing spurious fast-synch related crashes here - examining ATM). Another thing that just came to my mind: Do we have a well-defined destruction order of native skin or native tasks vs. system skin? I mean the latter destroys the local sem_heap while the former may purge remaining native resources (including the MPS fastlock). I think the ordering is inverted to what the code assumes (heap is destructed before the last task), no? Jan -- Siemens AG, Corporate Technology, CT T DE IT 1 Corporate Competence Center Embedded Linux ___ Xenomai-core mailing list Xenomai-core@gna.org https://mail.gna.org/listinfo/xenomai-core
Re: [Xenomai-core] [PULL] native: Fix msendq fastlock leakage
On 05/25/2011 01:20 PM, Jan Kiszka wrote: > On 2011-05-24 16:03, Gilles Chanteperdrix wrote: >> On 05/24/2011 03:52 PM, Jan Kiszka wrote: >>> On 2011-05-24 14:30, Gilles Chanteperdrix wrote: > Do you already have an idea how to get that info to the delete hook > function? Yes. We start by not applying the list reversal patch, then the sys_ppd is the first in the list. So, we can, in the function ppd_remove_mm, start by removing all the others ppd, then remove the sys ppd (that is the first), last. This changes a few signatures in the core code, a lot of things in the skin code, but that would be for the better... >>> >>> I still don't see how this affects the order we use in >>> do_taskexit_event, the one that prevents xnsys_get_ppd usage even when >>> the mm is still present. >> >> The idea is to change the cleanup routines not to call xnsys_get_ppd. > > ...and use what instead? Sorry, I'm slow today. The sys_ppd passed as other argument to the cleanup function. >>> >>> That would affect all thread hooks, not only the one for deletion. And >>> it would pull in more shadow-specific bits into the pod. >>> >>> Moreover, I think we would still be in troubles as mm, thus ppd, >>> deletion takes place before last task deletion, thus taskexit hook >>> invocation. That's due to the cleanup ordering in the kernel's do_exit. >>> >>> However, if you have a patch, I'd be happy to test and rework my leakage >>> fix. >> >> I will work on this ASAP. > > Sorry for pushing, but I need to decide if we should role out my > imperfect fix or if there is chance to use some upstream version > directly. Were you able to look into this, or will this likely take a > bit more time? I intended to try and do this next week-end. If it is more urgent than that, I can try in one or two days. In any case, I do not think we should try and workaround the current code, it is way to fragile. -- Gilles. ___ Xenomai-core mailing list Xenomai-core@gna.org https://mail.gna.org/listinfo/xenomai-core
Re: [Xenomai-core] [PULL] native: Fix msendq fastlock leakage
On 2011-05-24 16:03, Gilles Chanteperdrix wrote: > On 05/24/2011 03:52 PM, Jan Kiszka wrote: >> On 2011-05-24 14:30, Gilles Chanteperdrix wrote: Do you already have an idea how to get that info to the delete hook function? >>> >>> Yes. We start by not applying the list reversal patch, then the sys_ppd >>> is the first in the list. So, we can, in the function ppd_remove_mm, >>> start by removing all the others ppd, then remove the sys ppd (that is >>> the first), last. This changes a few signatures in the core code, a lot >>> of things in the skin code, but that would be for the better... >> >> I still don't see how this affects the order we use in >> do_taskexit_event, the one that prevents xnsys_get_ppd usage even when >> the mm is still present. > > The idea is to change the cleanup routines not to call xnsys_get_ppd. ...and use what instead? Sorry, I'm slow today. >>> >>> The sys_ppd passed as other argument to the cleanup function. >> >> That would affect all thread hooks, not only the one for deletion. And >> it would pull in more shadow-specific bits into the pod. >> >> Moreover, I think we would still be in troubles as mm, thus ppd, >> deletion takes place before last task deletion, thus taskexit hook >> invocation. That's due to the cleanup ordering in the kernel's do_exit. >> >> However, if you have a patch, I'd be happy to test and rework my leakage >> fix. > > I will work on this ASAP. Sorry for pushing, but I need to decide if we should role out my imperfect fix or if there is chance to use some upstream version directly. Were you able to look into this, or will this likely take a bit more time? Thanks, Jan -- Siemens AG, Corporate Technology, CT T DE IT 1 Corporate Competence Center Embedded Linux ___ Xenomai-core mailing list Xenomai-core@gna.org https://mail.gna.org/listinfo/xenomai-core
Re: [Xenomai-core] [PULL] native: Fix msendq fastlock leakage
On 05/24/2011 03:52 PM, Jan Kiszka wrote: > On 2011-05-24 14:30, Gilles Chanteperdrix wrote: >>> Do you already have an idea how to get that info to the delete hook >>> function? >> >> Yes. We start by not applying the list reversal patch, then the sys_ppd >> is the first in the list. So, we can, in the function ppd_remove_mm, >> start by removing all the others ppd, then remove the sys ppd (that is >> the first), last. This changes a few signatures in the core code, a lot >> of things in the skin code, but that would be for the better... > > I still don't see how this affects the order we use in > do_taskexit_event, the one that prevents xnsys_get_ppd usage even when > the mm is still present. The idea is to change the cleanup routines not to call xnsys_get_ppd. >>> >>> ...and use what instead? Sorry, I'm slow today. >> >> The sys_ppd passed as other argument to the cleanup function. > > That would affect all thread hooks, not only the one for deletion. And > it would pull in more shadow-specific bits into the pod. > > Moreover, I think we would still be in troubles as mm, thus ppd, > deletion takes place before last task deletion, thus taskexit hook > invocation. That's due to the cleanup ordering in the kernel's do_exit. > > However, if you have a patch, I'd be happy to test and rework my leakage > fix. I will work on this ASAP. -- Gilles. ___ Xenomai-core mailing list Xenomai-core@gna.org https://mail.gna.org/listinfo/xenomai-core
Re: [Xenomai-core] [PULL] native: Fix msendq fastlock leakage
On 2011-05-24 14:30, Gilles Chanteperdrix wrote: >> Do you already have an idea how to get that info to the delete hook >> function? > > Yes. We start by not applying the list reversal patch, then the sys_ppd > is the first in the list. So, we can, in the function ppd_remove_mm, > start by removing all the others ppd, then remove the sys ppd (that is > the first), last. This changes a few signatures in the core code, a lot > of things in the skin code, but that would be for the better... I still don't see how this affects the order we use in do_taskexit_event, the one that prevents xnsys_get_ppd usage even when the mm is still present. >>> >>> The idea is to change the cleanup routines not to call xnsys_get_ppd. >> >> ...and use what instead? Sorry, I'm slow today. > > The sys_ppd passed as other argument to the cleanup function. That would affect all thread hooks, not only the one for deletion. And it would pull in more shadow-specific bits into the pod. Moreover, I think we would still be in troubles as mm, thus ppd, deletion takes place before last task deletion, thus taskexit hook invocation. That's due to the cleanup ordering in the kernel's do_exit. However, if you have a patch, I'd be happy to test and rework my leakage fix. Jan -- Siemens AG, Corporate Technology, CT T DE IT 1 Corporate Competence Center Embedded Linux ___ Xenomai-core mailing list Xenomai-core@gna.org https://mail.gna.org/listinfo/xenomai-core
Re: [Xenomai-core] [PULL] native: Fix msendq fastlock leakage
On 05/24/2011 02:23 PM, Jan Kiszka wrote: > On 2011-05-24 12:41, Gilles Chanteperdrix wrote: >> On 05/24/2011 12:36 PM, Jan Kiszka wrote: >>> On 2011-05-24 11:58, Gilles Chanteperdrix wrote: On 05/24/2011 11:36 AM, Jan Kiszka wrote: > On 2011-05-24 11:32, Gilles Chanteperdrix wrote: >> On 05/24/2011 11:13 AM, Jan Kiszka wrote: >>> On 2011-05-24 06:31, Gilles Chanteperdrix wrote: On 05/23/2011 03:53 PM, Jan Kiszka wrote: > The following changes since commit > aec30a2543afa18fa7832deee85e187b0faeb1f0: > > xeno-test: fix reference to @XENO_TEST_DIR@ (2011-05-15 21:20:41 > +0200) > > are available in the git repository at: > git://git.xenomai.org/xenomai-jki.git for-upstream > > Jan Kiszka (1): > native: Fix msendq fastlock leakage > > include/native/task.h|5 + > ksrc/skins/native/task.c | 13 ++--- > 2 files changed, 11 insertions(+), 7 deletions(-) > > --8<-- > > When a native task terminates without going through rt_task_delete, we > leaked the fastlock so far. Fix it by moving the release into the > delete > hook. As the ppd is already invalid at that point, we have to save the > heap address in the task data structure. I Jan, I once worked on a patch to reverse the ppd cleanup order, in order to fix bugs of this kind. Here it comes. I do not remember why I did not commit it, but I guess it was not working well. Could we restart working from this patch? From 038ecf08cd66b3112e0fe277d71d294b8eb83bcc Mon Sep 17 00:00:00 2001 From: Gilles Chanteperdrix Date: Sun, 29 Aug 2010 14:52:08 +0200 Subject: [PATCH] nucleus: reverse ppd cleanup order --- ksrc/nucleus/shadow.c | 11 ++- 1 files changed, 6 insertions(+), 5 deletions(-) diff --git a/ksrc/nucleus/shadow.c b/ksrc/nucleus/shadow.c index b2d4326..725ae43 100644 --- a/ksrc/nucleus/shadow.c +++ b/ksrc/nucleus/shadow.c @@ -556,7 +556,7 @@ static unsigned ppd_lookup_inner(xnqueue_t **pq, } while (holder && (ppd->key.mm < pkey->mm || - (ppd->key.mm == pkey->mm && ppd->key.muxid < pkey->muxid))); + (ppd->key.mm == pkey->mm && ppd->key.muxid > pkey->muxid))); if (ppd->key.mm == pkey->mm && ppd->key.muxid == pkey->muxid) { /* found it, return it. */ @@ -566,7 +566,7 @@ static unsigned ppd_lookup_inner(xnqueue_t **pq, /* not found, return successor for insertion. */ if (ppd->key.mm < pkey->mm || - (ppd->key.mm == pkey->mm && ppd->key.muxid < pkey->muxid)) + (ppd->key.mm == pkey->mm && ppd->key.muxid > pkey->muxid)) *pholder = holder ? link2ppd(holder) : NULL; else *pholder = ppd; @@ -589,10 +589,11 @@ static int ppd_insert(xnshadow_ppd_t * holder) } inith(&holder->link); - if (next) + if (next) { insertq(q, &next->link, &holder->link); - else + } else { appendq(q, &holder->link); + } xnlock_put_irqrestore(&nklock, s); return 0; @@ -640,7 +641,7 @@ static inline void ppd_remove_mm(struct mm_struct *mm, xnqueue_t *q; spl_t s; - key.muxid = 0; + key.muxid = ~0UL; key.mm = mm; xnlock_get_irqsave(&nklock, s); ppd_lookup_inner(&q, &ppd, &key); >>> >>> Unfortunately, that won't help. I think we are forced to clear >>> xnshadow_thrptd before calling into xnpod_delete_thread, but we would >>> need that for xnshadow_ppd_get (=>xnpod_userspace_p()). >> >> I remember that now. Even if it worked, when the cleanup handler is >> called, current->mm is NULL. We need to do this differently, the sys ppd >> should be treated differently and passed to the other ppds cleanup >> routines. > > Do you already have an idea how to get that info to the delete hook > function? Yes. We start by not applying the list reversal patch, then the sys_ppd is the first in the list. So, we can, in the function ppd_remove_mm, start by removing all the others ppd, then remove the sys ppd (that is the first), last. This changes a few signatures in the core code, a lot of things in the skin code, but tha
Re: [Xenomai-core] [PULL] native: Fix msendq fastlock leakage
On 2011-05-24 12:41, Gilles Chanteperdrix wrote: > On 05/24/2011 12:36 PM, Jan Kiszka wrote: >> On 2011-05-24 11:58, Gilles Chanteperdrix wrote: >>> On 05/24/2011 11:36 AM, Jan Kiszka wrote: On 2011-05-24 11:32, Gilles Chanteperdrix wrote: > On 05/24/2011 11:13 AM, Jan Kiszka wrote: >> On 2011-05-24 06:31, Gilles Chanteperdrix wrote: >>> On 05/23/2011 03:53 PM, Jan Kiszka wrote: The following changes since commit aec30a2543afa18fa7832deee85e187b0faeb1f0: xeno-test: fix reference to @XENO_TEST_DIR@ (2011-05-15 21:20:41 +0200) are available in the git repository at: git://git.xenomai.org/xenomai-jki.git for-upstream Jan Kiszka (1): native: Fix msendq fastlock leakage include/native/task.h|5 + ksrc/skins/native/task.c | 13 ++--- 2 files changed, 11 insertions(+), 7 deletions(-) --8<-- When a native task terminates without going through rt_task_delete, we leaked the fastlock so far. Fix it by moving the release into the delete hook. As the ppd is already invalid at that point, we have to save the heap address in the task data structure. >>> >>> I Jan, I once worked on a patch to reverse the ppd cleanup order, in >>> order >>> to fix bugs of this kind. Here it comes. I do not remember why I did not >>> commit it, but I guess it was not working well. Could we restart working >>> from this patch? >>> >>> From 038ecf08cd66b3112e0fe277d71d294b8eb83bcc Mon Sep 17 00:00:00 2001 >>> From: Gilles Chanteperdrix >>> Date: Sun, 29 Aug 2010 14:52:08 +0200 >>> Subject: [PATCH] nucleus: reverse ppd cleanup order >>> >>> --- >>> ksrc/nucleus/shadow.c | 11 ++- >>> 1 files changed, 6 insertions(+), 5 deletions(-) >>> >>> diff --git a/ksrc/nucleus/shadow.c b/ksrc/nucleus/shadow.c >>> index b2d4326..725ae43 100644 >>> --- a/ksrc/nucleus/shadow.c >>> +++ b/ksrc/nucleus/shadow.c >>> @@ -556,7 +556,7 @@ static unsigned ppd_lookup_inner(xnqueue_t **pq, >>> } >>> while (holder && >>>(ppd->key.mm < pkey->mm || >>> - (ppd->key.mm == pkey->mm && ppd->key.muxid < >>> pkey->muxid))); >>> + (ppd->key.mm == pkey->mm && ppd->key.muxid > >>> pkey->muxid))); >>> >>> if (ppd->key.mm == pkey->mm && ppd->key.muxid == pkey->muxid) { >>> /* found it, return it. */ >>> @@ -566,7 +566,7 @@ static unsigned ppd_lookup_inner(xnqueue_t **pq, >>> >>> /* not found, return successor for insertion. */ >>> if (ppd->key.mm < pkey->mm || >>> - (ppd->key.mm == pkey->mm && ppd->key.muxid < pkey->muxid)) >>> + (ppd->key.mm == pkey->mm && ppd->key.muxid > pkey->muxid)) >>> *pholder = holder ? link2ppd(holder) : NULL; >>> else >>> *pholder = ppd; >>> @@ -589,10 +589,11 @@ static int ppd_insert(xnshadow_ppd_t * holder) >>> } >>> >>> inith(&holder->link); >>> - if (next) >>> + if (next) { >>> insertq(q, &next->link, &holder->link); >>> - else >>> + } else { >>> appendq(q, &holder->link); >>> + } >>> xnlock_put_irqrestore(&nklock, s); >>> >>> return 0; >>> @@ -640,7 +641,7 @@ static inline void ppd_remove_mm(struct mm_struct >>> *mm, >>> xnqueue_t *q; >>> spl_t s; >>> >>> - key.muxid = 0; >>> + key.muxid = ~0UL; >>> key.mm = mm; >>> xnlock_get_irqsave(&nklock, s); >>> ppd_lookup_inner(&q, &ppd, &key); >> >> Unfortunately, that won't help. I think we are forced to clear >> xnshadow_thrptd before calling into xnpod_delete_thread, but we would >> need that for xnshadow_ppd_get (=>xnpod_userspace_p()). > > I remember that now. Even if it worked, when the cleanup handler is > called, current->mm is NULL. We need to do this differently, the sys ppd > should be treated differently and passed to the other ppds cleanup > routines. Do you already have an idea how to get that info to the delete hook function? >>> >>> Yes. We start by not applying the list reversal patch, then the sys_ppd >>> is the first in the list. So, we can, in the function ppd_remove_mm, >>> start by removing all the others ppd, then remove the sys ppd (that is >>> the first), last. This changes a few signatures in the core code, a lot >>> of things in the skin code, but that would be for the better... >> >> I still don't see how this affects the order we use in >> do_taskexit_event, the one that prevents xnsys_get_
Re: [Xenomai-core] [PULL] native: Fix msendq fastlock leakage
On 05/24/2011 12:36 PM, Jan Kiszka wrote: > On 2011-05-24 11:58, Gilles Chanteperdrix wrote: >> On 05/24/2011 11:36 AM, Jan Kiszka wrote: >>> On 2011-05-24 11:32, Gilles Chanteperdrix wrote: On 05/24/2011 11:13 AM, Jan Kiszka wrote: > On 2011-05-24 06:31, Gilles Chanteperdrix wrote: >> On 05/23/2011 03:53 PM, Jan Kiszka wrote: >>> The following changes since commit >>> aec30a2543afa18fa7832deee85e187b0faeb1f0: >>> >>> xeno-test: fix reference to @XENO_TEST_DIR@ (2011-05-15 21:20:41 >>> +0200) >>> >>> are available in the git repository at: >>> git://git.xenomai.org/xenomai-jki.git for-upstream >>> >>> Jan Kiszka (1): >>> native: Fix msendq fastlock leakage >>> >>> include/native/task.h|5 + >>> ksrc/skins/native/task.c | 13 ++--- >>> 2 files changed, 11 insertions(+), 7 deletions(-) >>> >>> --8<-- >>> >>> When a native task terminates without going through rt_task_delete, we >>> leaked the fastlock so far. Fix it by moving the release into the delete >>> hook. As the ppd is already invalid at that point, we have to save the >>> heap address in the task data structure. >> >> I Jan, I once worked on a patch to reverse the ppd cleanup order, in >> order >> to fix bugs of this kind. Here it comes. I do not remember why I did not >> commit it, but I guess it was not working well. Could we restart working >> from this patch? >> >> From 038ecf08cd66b3112e0fe277d71d294b8eb83bcc Mon Sep 17 00:00:00 2001 >> From: Gilles Chanteperdrix >> Date: Sun, 29 Aug 2010 14:52:08 +0200 >> Subject: [PATCH] nucleus: reverse ppd cleanup order >> >> --- >> ksrc/nucleus/shadow.c | 11 ++- >> 1 files changed, 6 insertions(+), 5 deletions(-) >> >> diff --git a/ksrc/nucleus/shadow.c b/ksrc/nucleus/shadow.c >> index b2d4326..725ae43 100644 >> --- a/ksrc/nucleus/shadow.c >> +++ b/ksrc/nucleus/shadow.c >> @@ -556,7 +556,7 @@ static unsigned ppd_lookup_inner(xnqueue_t **pq, >> } >> while (holder && >> (ppd->key.mm < pkey->mm || >> -(ppd->key.mm == pkey->mm && ppd->key.muxid < >> pkey->muxid))); >> +(ppd->key.mm == pkey->mm && ppd->key.muxid > >> pkey->muxid))); >> >> if (ppd->key.mm == pkey->mm && ppd->key.muxid == pkey->muxid) { >> /* found it, return it. */ >> @@ -566,7 +566,7 @@ static unsigned ppd_lookup_inner(xnqueue_t **pq, >> >> /* not found, return successor for insertion. */ >> if (ppd->key.mm < pkey->mm || >> -(ppd->key.mm == pkey->mm && ppd->key.muxid < pkey->muxid)) >> +(ppd->key.mm == pkey->mm && ppd->key.muxid > pkey->muxid)) >> *pholder = holder ? link2ppd(holder) : NULL; >> else >> *pholder = ppd; >> @@ -589,10 +589,11 @@ static int ppd_insert(xnshadow_ppd_t * holder) >> } >> >> inith(&holder->link); >> -if (next) >> +if (next) { >> insertq(q, &next->link, &holder->link); >> -else >> +} else { >> appendq(q, &holder->link); >> +} >> xnlock_put_irqrestore(&nklock, s); >> >> return 0; >> @@ -640,7 +641,7 @@ static inline void ppd_remove_mm(struct mm_struct >> *mm, >> xnqueue_t *q; >> spl_t s; >> >> -key.muxid = 0; >> +key.muxid = ~0UL; >> key.mm = mm; >> xnlock_get_irqsave(&nklock, s); >> ppd_lookup_inner(&q, &ppd, &key); > > Unfortunately, that won't help. I think we are forced to clear > xnshadow_thrptd before calling into xnpod_delete_thread, but we would > need that for xnshadow_ppd_get (=>xnpod_userspace_p()). I remember that now. Even if it worked, when the cleanup handler is called, current->mm is NULL. We need to do this differently, the sys ppd should be treated differently and passed to the other ppds cleanup routines. >>> >>> Do you already have an idea how to get that info to the delete hook >>> function? >> >> Yes. We start by not applying the list reversal patch, then the sys_ppd >> is the first in the list. So, we can, in the function ppd_remove_mm, >> start by removing all the others ppd, then remove the sys ppd (that is >> the first), last. This changes a few signatures in the core code, a lot >> of things in the skin code, but that would be for the better... > > I still don't see how this affects the order we use in > do_taskexit_event, the one that prevents xnsys_get_ppd usage even when > the mm is still present. The idea is to change the cleanup routines not to call xnsys_get_ppd. --
Re: [Xenomai-core] [PULL] native: Fix msendq fastlock leakage
On 2011-05-24 11:58, Gilles Chanteperdrix wrote: > On 05/24/2011 11:36 AM, Jan Kiszka wrote: >> On 2011-05-24 11:32, Gilles Chanteperdrix wrote: >>> On 05/24/2011 11:13 AM, Jan Kiszka wrote: On 2011-05-24 06:31, Gilles Chanteperdrix wrote: > On 05/23/2011 03:53 PM, Jan Kiszka wrote: >> The following changes since commit >> aec30a2543afa18fa7832deee85e187b0faeb1f0: >> >> xeno-test: fix reference to @XENO_TEST_DIR@ (2011-05-15 21:20:41 +0200) >> >> are available in the git repository at: >> git://git.xenomai.org/xenomai-jki.git for-upstream >> >> Jan Kiszka (1): >> native: Fix msendq fastlock leakage >> >> include/native/task.h|5 + >> ksrc/skins/native/task.c | 13 ++--- >> 2 files changed, 11 insertions(+), 7 deletions(-) >> >> --8<-- >> >> When a native task terminates without going through rt_task_delete, we >> leaked the fastlock so far. Fix it by moving the release into the delete >> hook. As the ppd is already invalid at that point, we have to save the >> heap address in the task data structure. > > I Jan, I once worked on a patch to reverse the ppd cleanup order, in order > to fix bugs of this kind. Here it comes. I do not remember why I did not > commit it, but I guess it was not working well. Could we restart working > from this patch? > > From 038ecf08cd66b3112e0fe277d71d294b8eb83bcc Mon Sep 17 00:00:00 2001 > From: Gilles Chanteperdrix > Date: Sun, 29 Aug 2010 14:52:08 +0200 > Subject: [PATCH] nucleus: reverse ppd cleanup order > > --- > ksrc/nucleus/shadow.c | 11 ++- > 1 files changed, 6 insertions(+), 5 deletions(-) > > diff --git a/ksrc/nucleus/shadow.c b/ksrc/nucleus/shadow.c > index b2d4326..725ae43 100644 > --- a/ksrc/nucleus/shadow.c > +++ b/ksrc/nucleus/shadow.c > @@ -556,7 +556,7 @@ static unsigned ppd_lookup_inner(xnqueue_t **pq, > } > while (holder && > (ppd->key.mm < pkey->mm || > - (ppd->key.mm == pkey->mm && ppd->key.muxid < pkey->muxid))); > + (ppd->key.mm == pkey->mm && ppd->key.muxid > pkey->muxid))); > > if (ppd->key.mm == pkey->mm && ppd->key.muxid == pkey->muxid) { > /* found it, return it. */ > @@ -566,7 +566,7 @@ static unsigned ppd_lookup_inner(xnqueue_t **pq, > > /* not found, return successor for insertion. */ > if (ppd->key.mm < pkey->mm || > - (ppd->key.mm == pkey->mm && ppd->key.muxid < pkey->muxid)) > + (ppd->key.mm == pkey->mm && ppd->key.muxid > pkey->muxid)) > *pholder = holder ? link2ppd(holder) : NULL; > else > *pholder = ppd; > @@ -589,10 +589,11 @@ static int ppd_insert(xnshadow_ppd_t * holder) > } > > inith(&holder->link); > - if (next) > + if (next) { > insertq(q, &next->link, &holder->link); > - else > + } else { > appendq(q, &holder->link); > + } > xnlock_put_irqrestore(&nklock, s); > > return 0; > @@ -640,7 +641,7 @@ static inline void ppd_remove_mm(struct mm_struct *mm, > xnqueue_t *q; > spl_t s; > > - key.muxid = 0; > + key.muxid = ~0UL; > key.mm = mm; > xnlock_get_irqsave(&nklock, s); > ppd_lookup_inner(&q, &ppd, &key); Unfortunately, that won't help. I think we are forced to clear xnshadow_thrptd before calling into xnpod_delete_thread, but we would need that for xnshadow_ppd_get (=>xnpod_userspace_p()). >>> >>> I remember that now. Even if it worked, when the cleanup handler is >>> called, current->mm is NULL. We need to do this differently, the sys ppd >>> should be treated differently and passed to the other ppds cleanup routines. >> >> Do you already have an idea how to get that info to the delete hook >> function? > > Yes. We start by not applying the list reversal patch, then the sys_ppd > is the first in the list. So, we can, in the function ppd_remove_mm, > start by removing all the others ppd, then remove the sys ppd (that is > the first), last. This changes a few signatures in the core code, a lot > of things in the skin code, but that would be for the better... I still don't see how this affects the order we use in do_taskexit_event, the one that prevents xnsys_get_ppd usage even when the mm is still present. Jan -- Siemens AG, Corporate Technology, CT T DE IT 1 Corporate Competence Center Embedded Linux ___ Xenomai-core mailing list Xenomai-core@gna.org https://mail.gna.org/listinfo/xenomai-core
Re: [Xenomai-core] [PULL] native: Fix msendq fastlock leakage
On 05/24/2011 11:36 AM, Jan Kiszka wrote: > On 2011-05-24 11:32, Gilles Chanteperdrix wrote: >> On 05/24/2011 11:13 AM, Jan Kiszka wrote: >>> On 2011-05-24 06:31, Gilles Chanteperdrix wrote: On 05/23/2011 03:53 PM, Jan Kiszka wrote: > The following changes since commit > aec30a2543afa18fa7832deee85e187b0faeb1f0: > > xeno-test: fix reference to @XENO_TEST_DIR@ (2011-05-15 21:20:41 +0200) > > are available in the git repository at: > git://git.xenomai.org/xenomai-jki.git for-upstream > > Jan Kiszka (1): > native: Fix msendq fastlock leakage > > include/native/task.h|5 + > ksrc/skins/native/task.c | 13 ++--- > 2 files changed, 11 insertions(+), 7 deletions(-) > > --8<-- > > When a native task terminates without going through rt_task_delete, we > leaked the fastlock so far. Fix it by moving the release into the delete > hook. As the ppd is already invalid at that point, we have to save the > heap address in the task data structure. I Jan, I once worked on a patch to reverse the ppd cleanup order, in order to fix bugs of this kind. Here it comes. I do not remember why I did not commit it, but I guess it was not working well. Could we restart working from this patch? From 038ecf08cd66b3112e0fe277d71d294b8eb83bcc Mon Sep 17 00:00:00 2001 From: Gilles Chanteperdrix Date: Sun, 29 Aug 2010 14:52:08 +0200 Subject: [PATCH] nucleus: reverse ppd cleanup order --- ksrc/nucleus/shadow.c | 11 ++- 1 files changed, 6 insertions(+), 5 deletions(-) diff --git a/ksrc/nucleus/shadow.c b/ksrc/nucleus/shadow.c index b2d4326..725ae43 100644 --- a/ksrc/nucleus/shadow.c +++ b/ksrc/nucleus/shadow.c @@ -556,7 +556,7 @@ static unsigned ppd_lookup_inner(xnqueue_t **pq, } while (holder && (ppd->key.mm < pkey->mm || - (ppd->key.mm == pkey->mm && ppd->key.muxid < pkey->muxid))); + (ppd->key.mm == pkey->mm && ppd->key.muxid > pkey->muxid))); if (ppd->key.mm == pkey->mm && ppd->key.muxid == pkey->muxid) { /* found it, return it. */ @@ -566,7 +566,7 @@ static unsigned ppd_lookup_inner(xnqueue_t **pq, /* not found, return successor for insertion. */ if (ppd->key.mm < pkey->mm || - (ppd->key.mm == pkey->mm && ppd->key.muxid < pkey->muxid)) + (ppd->key.mm == pkey->mm && ppd->key.muxid > pkey->muxid)) *pholder = holder ? link2ppd(holder) : NULL; else *pholder = ppd; @@ -589,10 +589,11 @@ static int ppd_insert(xnshadow_ppd_t * holder) } inith(&holder->link); - if (next) + if (next) { insertq(q, &next->link, &holder->link); - else + } else { appendq(q, &holder->link); + } xnlock_put_irqrestore(&nklock, s); return 0; @@ -640,7 +641,7 @@ static inline void ppd_remove_mm(struct mm_struct *mm, xnqueue_t *q; spl_t s; - key.muxid = 0; + key.muxid = ~0UL; key.mm = mm; xnlock_get_irqsave(&nklock, s); ppd_lookup_inner(&q, &ppd, &key); >>> >>> Unfortunately, that won't help. I think we are forced to clear >>> xnshadow_thrptd before calling into xnpod_delete_thread, but we would >>> need that for xnshadow_ppd_get (=>xnpod_userspace_p()). >> >> I remember that now. Even if it worked, when the cleanup handler is >> called, current->mm is NULL. We need to do this differently, the sys ppd >> should be treated differently and passed to the other ppds cleanup routines. > > Do you already have an idea how to get that info to the delete hook > function? Yes. We start by not applying the list reversal patch, then the sys_ppd is the first in the list. So, we can, in the function ppd_remove_mm, start by removing all the others ppd, then remove the sys ppd (that is the first), last. This changes a few signatures in the core code, a lot of things in the skin code, but that would be for the better... -- Gilles. ___ Xenomai-core mailing list Xenomai-core@gna.org https://mail.gna.org/listinfo/xenomai-core
Re: [Xenomai-core] [PULL] native: Fix msendq fastlock leakage
On 2011-05-24 11:32, Gilles Chanteperdrix wrote: > On 05/24/2011 11:13 AM, Jan Kiszka wrote: >> On 2011-05-24 06:31, Gilles Chanteperdrix wrote: >>> On 05/23/2011 03:53 PM, Jan Kiszka wrote: The following changes since commit aec30a2543afa18fa7832deee85e187b0faeb1f0: xeno-test: fix reference to @XENO_TEST_DIR@ (2011-05-15 21:20:41 +0200) are available in the git repository at: git://git.xenomai.org/xenomai-jki.git for-upstream Jan Kiszka (1): native: Fix msendq fastlock leakage include/native/task.h|5 + ksrc/skins/native/task.c | 13 ++--- 2 files changed, 11 insertions(+), 7 deletions(-) --8<-- When a native task terminates without going through rt_task_delete, we leaked the fastlock so far. Fix it by moving the release into the delete hook. As the ppd is already invalid at that point, we have to save the heap address in the task data structure. >>> >>> I Jan, I once worked on a patch to reverse the ppd cleanup order, in order >>> to fix bugs of this kind. Here it comes. I do not remember why I did not >>> commit it, but I guess it was not working well. Could we restart working >>> from this patch? >>> >>> From 038ecf08cd66b3112e0fe277d71d294b8eb83bcc Mon Sep 17 00:00:00 2001 >>> From: Gilles Chanteperdrix >>> Date: Sun, 29 Aug 2010 14:52:08 +0200 >>> Subject: [PATCH] nucleus: reverse ppd cleanup order >>> >>> --- >>> ksrc/nucleus/shadow.c | 11 ++- >>> 1 files changed, 6 insertions(+), 5 deletions(-) >>> >>> diff --git a/ksrc/nucleus/shadow.c b/ksrc/nucleus/shadow.c >>> index b2d4326..725ae43 100644 >>> --- a/ksrc/nucleus/shadow.c >>> +++ b/ksrc/nucleus/shadow.c >>> @@ -556,7 +556,7 @@ static unsigned ppd_lookup_inner(xnqueue_t **pq, >>> } >>> while (holder && >>>(ppd->key.mm < pkey->mm || >>> - (ppd->key.mm == pkey->mm && ppd->key.muxid < pkey->muxid))); >>> + (ppd->key.mm == pkey->mm && ppd->key.muxid > pkey->muxid))); >>> >>> if (ppd->key.mm == pkey->mm && ppd->key.muxid == pkey->muxid) { >>> /* found it, return it. */ >>> @@ -566,7 +566,7 @@ static unsigned ppd_lookup_inner(xnqueue_t **pq, >>> >>> /* not found, return successor for insertion. */ >>> if (ppd->key.mm < pkey->mm || >>> - (ppd->key.mm == pkey->mm && ppd->key.muxid < pkey->muxid)) >>> + (ppd->key.mm == pkey->mm && ppd->key.muxid > pkey->muxid)) >>> *pholder = holder ? link2ppd(holder) : NULL; >>> else >>> *pholder = ppd; >>> @@ -589,10 +589,11 @@ static int ppd_insert(xnshadow_ppd_t * holder) >>> } >>> >>> inith(&holder->link); >>> - if (next) >>> + if (next) { >>> insertq(q, &next->link, &holder->link); >>> - else >>> + } else { >>> appendq(q, &holder->link); >>> + } >>> xnlock_put_irqrestore(&nklock, s); >>> >>> return 0; >>> @@ -640,7 +641,7 @@ static inline void ppd_remove_mm(struct mm_struct *mm, >>> xnqueue_t *q; >>> spl_t s; >>> >>> - key.muxid = 0; >>> + key.muxid = ~0UL; >>> key.mm = mm; >>> xnlock_get_irqsave(&nklock, s); >>> ppd_lookup_inner(&q, &ppd, &key); >> >> Unfortunately, that won't help. I think we are forced to clear >> xnshadow_thrptd before calling into xnpod_delete_thread, but we would >> need that for xnshadow_ppd_get (=>xnpod_userspace_p()). > > I remember that now. Even if it worked, when the cleanup handler is > called, current->mm is NULL. We need to do this differently, the sys ppd > should be treated differently and passed to the other ppds cleanup routines. Do you already have an idea how to get that info to the delete hook function? Jan -- Siemens AG, Corporate Technology, CT T DE IT 1 Corporate Competence Center Embedded Linux ___ Xenomai-core mailing list Xenomai-core@gna.org https://mail.gna.org/listinfo/xenomai-core
Re: [Xenomai-core] [PULL] native: Fix msendq fastlock leakage
On 05/24/2011 11:13 AM, Jan Kiszka wrote: > On 2011-05-24 06:31, Gilles Chanteperdrix wrote: >> On 05/23/2011 03:53 PM, Jan Kiszka wrote: >>> The following changes since commit aec30a2543afa18fa7832deee85e187b0faeb1f0: >>> >>> xeno-test: fix reference to @XENO_TEST_DIR@ (2011-05-15 21:20:41 +0200) >>> >>> are available in the git repository at: >>> git://git.xenomai.org/xenomai-jki.git for-upstream >>> >>> Jan Kiszka (1): >>> native: Fix msendq fastlock leakage >>> >>> include/native/task.h|5 + >>> ksrc/skins/native/task.c | 13 ++--- >>> 2 files changed, 11 insertions(+), 7 deletions(-) >>> >>> --8<-- >>> >>> When a native task terminates without going through rt_task_delete, we >>> leaked the fastlock so far. Fix it by moving the release into the delete >>> hook. As the ppd is already invalid at that point, we have to save the >>> heap address in the task data structure. >> >> I Jan, I once worked on a patch to reverse the ppd cleanup order, in order >> to fix bugs of this kind. Here it comes. I do not remember why I did not >> commit it, but I guess it was not working well. Could we restart working >> from this patch? >> >> From 038ecf08cd66b3112e0fe277d71d294b8eb83bcc Mon Sep 17 00:00:00 2001 >> From: Gilles Chanteperdrix >> Date: Sun, 29 Aug 2010 14:52:08 +0200 >> Subject: [PATCH] nucleus: reverse ppd cleanup order >> >> --- >> ksrc/nucleus/shadow.c | 11 ++- >> 1 files changed, 6 insertions(+), 5 deletions(-) >> >> diff --git a/ksrc/nucleus/shadow.c b/ksrc/nucleus/shadow.c >> index b2d4326..725ae43 100644 >> --- a/ksrc/nucleus/shadow.c >> +++ b/ksrc/nucleus/shadow.c >> @@ -556,7 +556,7 @@ static unsigned ppd_lookup_inner(xnqueue_t **pq, >> } >> while (holder && >> (ppd->key.mm < pkey->mm || >> -(ppd->key.mm == pkey->mm && ppd->key.muxid < pkey->muxid))); >> +(ppd->key.mm == pkey->mm && ppd->key.muxid > pkey->muxid))); >> >> if (ppd->key.mm == pkey->mm && ppd->key.muxid == pkey->muxid) { >> /* found it, return it. */ >> @@ -566,7 +566,7 @@ static unsigned ppd_lookup_inner(xnqueue_t **pq, >> >> /* not found, return successor for insertion. */ >> if (ppd->key.mm < pkey->mm || >> -(ppd->key.mm == pkey->mm && ppd->key.muxid < pkey->muxid)) >> +(ppd->key.mm == pkey->mm && ppd->key.muxid > pkey->muxid)) >> *pholder = holder ? link2ppd(holder) : NULL; >> else >> *pholder = ppd; >> @@ -589,10 +589,11 @@ static int ppd_insert(xnshadow_ppd_t * holder) >> } >> >> inith(&holder->link); >> -if (next) >> +if (next) { >> insertq(q, &next->link, &holder->link); >> -else >> +} else { >> appendq(q, &holder->link); >> +} >> xnlock_put_irqrestore(&nklock, s); >> >> return 0; >> @@ -640,7 +641,7 @@ static inline void ppd_remove_mm(struct mm_struct *mm, >> xnqueue_t *q; >> spl_t s; >> >> -key.muxid = 0; >> +key.muxid = ~0UL; >> key.mm = mm; >> xnlock_get_irqsave(&nklock, s); >> ppd_lookup_inner(&q, &ppd, &key); > > Unfortunately, that won't help. I think we are forced to clear > xnshadow_thrptd before calling into xnpod_delete_thread, but we would > need that for xnshadow_ppd_get (=>xnpod_userspace_p()). I remember that now. Even if it worked, when the cleanup handler is called, current->mm is NULL. We need to do this differently, the sys ppd should be treated differently and passed to the other ppds cleanup routines. -- Gilles. ___ Xenomai-core mailing list Xenomai-core@gna.org https://mail.gna.org/listinfo/xenomai-core
Re: [Xenomai-core] [PULL] native: Fix msendq fastlock leakage
On 2011-05-24 06:31, Gilles Chanteperdrix wrote: > On 05/23/2011 03:53 PM, Jan Kiszka wrote: >> The following changes since commit aec30a2543afa18fa7832deee85e187b0faeb1f0: >> >> xeno-test: fix reference to @XENO_TEST_DIR@ (2011-05-15 21:20:41 +0200) >> >> are available in the git repository at: >> git://git.xenomai.org/xenomai-jki.git for-upstream >> >> Jan Kiszka (1): >> native: Fix msendq fastlock leakage >> >> include/native/task.h|5 + >> ksrc/skins/native/task.c | 13 ++--- >> 2 files changed, 11 insertions(+), 7 deletions(-) >> >> --8<-- >> >> When a native task terminates without going through rt_task_delete, we >> leaked the fastlock so far. Fix it by moving the release into the delete >> hook. As the ppd is already invalid at that point, we have to save the >> heap address in the task data structure. > > I Jan, I once worked on a patch to reverse the ppd cleanup order, in order > to fix bugs of this kind. Here it comes. I do not remember why I did not > commit it, but I guess it was not working well. Could we restart working > from this patch? > > From 038ecf08cd66b3112e0fe277d71d294b8eb83bcc Mon Sep 17 00:00:00 2001 > From: Gilles Chanteperdrix > Date: Sun, 29 Aug 2010 14:52:08 +0200 > Subject: [PATCH] nucleus: reverse ppd cleanup order > > --- > ksrc/nucleus/shadow.c | 11 ++- > 1 files changed, 6 insertions(+), 5 deletions(-) > > diff --git a/ksrc/nucleus/shadow.c b/ksrc/nucleus/shadow.c > index b2d4326..725ae43 100644 > --- a/ksrc/nucleus/shadow.c > +++ b/ksrc/nucleus/shadow.c > @@ -556,7 +556,7 @@ static unsigned ppd_lookup_inner(xnqueue_t **pq, > } > while (holder && > (ppd->key.mm < pkey->mm || > - (ppd->key.mm == pkey->mm && ppd->key.muxid < pkey->muxid))); > + (ppd->key.mm == pkey->mm && ppd->key.muxid > pkey->muxid))); > > if (ppd->key.mm == pkey->mm && ppd->key.muxid == pkey->muxid) { > /* found it, return it. */ > @@ -566,7 +566,7 @@ static unsigned ppd_lookup_inner(xnqueue_t **pq, > > /* not found, return successor for insertion. */ > if (ppd->key.mm < pkey->mm || > - (ppd->key.mm == pkey->mm && ppd->key.muxid < pkey->muxid)) > + (ppd->key.mm == pkey->mm && ppd->key.muxid > pkey->muxid)) > *pholder = holder ? link2ppd(holder) : NULL; > else > *pholder = ppd; > @@ -589,10 +589,11 @@ static int ppd_insert(xnshadow_ppd_t * holder) > } > > inith(&holder->link); > - if (next) > + if (next) { > insertq(q, &next->link, &holder->link); > - else > + } else { > appendq(q, &holder->link); > + } > xnlock_put_irqrestore(&nklock, s); > > return 0; > @@ -640,7 +641,7 @@ static inline void ppd_remove_mm(struct mm_struct *mm, > xnqueue_t *q; > spl_t s; > > - key.muxid = 0; > + key.muxid = ~0UL; > key.mm = mm; > xnlock_get_irqsave(&nklock, s); > ppd_lookup_inner(&q, &ppd, &key); Unfortunately, that won't help. I think we are forced to clear xnshadow_thrptd before calling into xnpod_delete_thread, but we would need that for xnshadow_ppd_get (=>xnpod_userspace_p()). Jan -- Siemens AG, Corporate Technology, CT T DE IT 1 Corporate Competence Center Embedded Linux ___ Xenomai-core mailing list Xenomai-core@gna.org https://mail.gna.org/listinfo/xenomai-core
Re: [Xenomai-core] [PULL] native: Fix msendq fastlock leakage
On 05/23/2011 03:53 PM, Jan Kiszka wrote: > The following changes since commit aec30a2543afa18fa7832deee85e187b0faeb1f0: > > xeno-test: fix reference to @XENO_TEST_DIR@ (2011-05-15 21:20:41 +0200) > > are available in the git repository at: > git://git.xenomai.org/xenomai-jki.git for-upstream > > Jan Kiszka (1): > native: Fix msendq fastlock leakage > > include/native/task.h|5 + > ksrc/skins/native/task.c | 13 ++--- > 2 files changed, 11 insertions(+), 7 deletions(-) > > --8<-- > > When a native task terminates without going through rt_task_delete, we > leaked the fastlock so far. Fix it by moving the release into the delete > hook. As the ppd is already invalid at that point, we have to save the > heap address in the task data structure. I Jan, I once worked on a patch to reverse the ppd cleanup order, in order to fix bugs of this kind. Here it comes. I do not remember why I did not commit it, but I guess it was not working well. Could we restart working from this patch? >From 038ecf08cd66b3112e0fe277d71d294b8eb83bcc Mon Sep 17 00:00:00 2001 From: Gilles Chanteperdrix Date: Sun, 29 Aug 2010 14:52:08 +0200 Subject: [PATCH] nucleus: reverse ppd cleanup order --- ksrc/nucleus/shadow.c | 11 ++- 1 files changed, 6 insertions(+), 5 deletions(-) diff --git a/ksrc/nucleus/shadow.c b/ksrc/nucleus/shadow.c index b2d4326..725ae43 100644 --- a/ksrc/nucleus/shadow.c +++ b/ksrc/nucleus/shadow.c @@ -556,7 +556,7 @@ static unsigned ppd_lookup_inner(xnqueue_t **pq, } while (holder && (ppd->key.mm < pkey->mm || - (ppd->key.mm == pkey->mm && ppd->key.muxid < pkey->muxid))); + (ppd->key.mm == pkey->mm && ppd->key.muxid > pkey->muxid))); if (ppd->key.mm == pkey->mm && ppd->key.muxid == pkey->muxid) { /* found it, return it. */ @@ -566,7 +566,7 @@ static unsigned ppd_lookup_inner(xnqueue_t **pq, /* not found, return successor for insertion. */ if (ppd->key.mm < pkey->mm || - (ppd->key.mm == pkey->mm && ppd->key.muxid < pkey->muxid)) + (ppd->key.mm == pkey->mm && ppd->key.muxid > pkey->muxid)) *pholder = holder ? link2ppd(holder) : NULL; else *pholder = ppd; @@ -589,10 +589,11 @@ static int ppd_insert(xnshadow_ppd_t * holder) } inith(&holder->link); - if (next) + if (next) { insertq(q, &next->link, &holder->link); - else + } else { appendq(q, &holder->link); + } xnlock_put_irqrestore(&nklock, s); return 0; @@ -640,7 +641,7 @@ static inline void ppd_remove_mm(struct mm_struct *mm, xnqueue_t *q; spl_t s; - key.muxid = 0; + key.muxid = ~0UL; key.mm = mm; xnlock_get_irqsave(&nklock, s); ppd_lookup_inner(&q, &ppd, &key); -- 1.7.2.5 -- Gilles. ___ Xenomai-core mailing list Xenomai-core@gna.org https://mail.gna.org/listinfo/xenomai-core
[Xenomai-core] [PULL] native: Fix msendq fastlock leakage
The following changes since commit aec30a2543afa18fa7832deee85e187b0faeb1f0: xeno-test: fix reference to @XENO_TEST_DIR@ (2011-05-15 21:20:41 +0200) are available in the git repository at: git://git.xenomai.org/xenomai-jki.git for-upstream Jan Kiszka (1): native: Fix msendq fastlock leakage include/native/task.h|5 + ksrc/skins/native/task.c | 13 ++--- 2 files changed, 11 insertions(+), 7 deletions(-) --8<-- When a native task terminates without going through rt_task_delete, we leaked the fastlock so far. Fix it by moving the release into the delete hook. As the ppd is already invalid at that point, we have to save the heap address in the task data structure. Signed-off-by: Jan Kiszka --- include/native/task.h|5 + ksrc/skins/native/task.c | 13 ++--- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/include/native/task.h b/include/native/task.h index 519aaec..0d44ccf 100644 --- a/include/native/task.h +++ b/include/native/task.h @@ -22,6 +22,7 @@ #ifndef _XENO_TASK_H #define _XENO_TASK_H +#include #include #include @@ -166,6 +167,10 @@ typedef struct rt_task { xnsynch_t mrecv, msendq; +#ifdef CONFIG_XENO_FASTSYNCH + xnheap_t *msendq_fastlock_heap; +#endif /* CONFIG_XENO_FASTSYNCH */ + int flowgen; /* !< Flow id. generator. */ #endif /* CONFIG_XENO_OPT_NATIVE_MPS */ diff --git a/ksrc/skins/native/task.c b/ksrc/skins/native/task.c index 1192509..6970363 100644 --- a/ksrc/skins/native/task.c +++ b/ksrc/skins/native/task.c @@ -79,6 +79,9 @@ static void __task_delete_hook(xnthread_t *thread) hooks are done. */ xnsynch_destroy(&task->mrecv); xnsynch_destroy(&task->msendq); +#ifdef CONFIG_XENO_FASTSYNCH + xnheap_free(task->msendq_fastlock_heap, task->msendq.fastlock); +#endif /* CONFIG_XENO_FASTSYNCH */ #endif /* CONFIG_XENO_OPT_NATIVE_MPS */ xnsynch_destroy(&task->safesynch); @@ -301,7 +304,8 @@ int rt_task_create(RT_TASK *task, #ifdef CONFIG_XENO_OPT_NATIVE_MPS #ifdef CONFIG_XENO_FASTSYNCH /* Allocate lock memory for in-kernel use */ - fastlock = xnheap_alloc(&xnsys_ppd_get(0)->sem_heap, + task->msendq_fastlock_heap = &xnsys_ppd_get(0)->sem_heap; + fastlock = xnheap_alloc(task->msendq_fastlock_heap, sizeof(*fastlock)); if (fastlock == NULL) return -ENOMEM; @@ -328,7 +332,7 @@ int rt_task_create(RT_TASK *task, err = xnthread_register(&task->thread_base, name ? task->rname : ""); if (err) { #if defined(CONFIG_XENO_OPT_NATIVE_MPS) && defined(CONFIG_XENO_FASTSYNCH) - xnheap_free(&xnsys_ppd_get(0)->sem_heap, fastlock); + xnheap_free(task->msendq_fastlock_heap, fastlock); #endif xnpod_delete_thread(&task->thread_base); } else @@ -640,11 +644,6 @@ int rt_task_delete(RT_TASK *task) if (err) goto unlock_and_exit; -#if defined(CONFIG_XENO_OPT_NATIVE_MPS) && defined(CONFIG_XENO_FASTSYNCH) - xnheap_free(&xnsys_ppd_get(0)->sem_heap, - task->msendq.fastlock); -#endif - /* Does not return if task is current. */ xnpod_delete_thread(&task->thread_base); -- 1.7.1 ___ Xenomai-core mailing list Xenomai-core@gna.org https://mail.gna.org/listinfo/xenomai-core