Re: [PATCH] ilp32: fix {GET,SET}SIGMASK request for ptrace

2016-06-26 Thread Yury Norov
Hi Zhou,

Thank you for the patch. The idea is ok, but patch format got broken
for some reason. Could you re-send it?

Yury.

On Mon, Jun 27, 2016 at 12:49:05PM +0800, zhouchengming wrote:
> atus: RO
> Content-Length: 4732
> Lines: 181
> 
> The function compat_ptrace_request(used by ilp32) don't handle
> {GET,SET}SIGMASK request, so it will be handled by ptrace_request.
> But it's wrong because the compat_sigset_t of ilp32 differs from
> the sigset_t of aarch64. The patch fixes it.
> 
> Signed-off-by: Zhou Chengming 
> ---
>  arch/arm64/include/asm/signal_ilp32.h |   22 
>  arch/arm64/kernel/ptrace.c|   62
> +

Here -  unneeded line break

>  arch/arm64/kernel/signal_ilp32.c  |   23 +
>  3 files changed, 85 insertions(+), 22 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/signal_ilp32.h
> b/arch/arm64/include/asm/signal_ilp32.h

and here

> index 30eff23..ed52607 100644
> --- a/arch/arm64/include/asm/signal_ilp32.h
> +++ b/arch/arm64/include/asm/signal_ilp32.h
> @@ -21,6 +21,28 @@
>  int ilp32_setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
> struct pt_regs *regs);
> 
> +static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set)
> +{
> + compat_sigset_t cset;
> +
> + cset.sig[0] = set->sig[0] & 0xull;
> + cset.sig[1] = set->sig[0] >> 32;
> +
> + return copy_to_user(uset, , sizeof(*uset));
> +}
> +
> +static inline int get_sigset_t(sigset_t *set,
> +const compat_sigset_t __user *uset)
> +{
> + compat_sigset_t s32;
> +
> + if (copy_from_user(, uset, sizeof(*uset)))
> + return -EFAULT;
> +
> + set->sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32);
> + return 0;
> +}
> +
>  #else
> 
>  static inline int ilp32_setup_rt_frame(int usig, struct ksignal *ksig,
> sigset_t *set,

and here

> diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
> index a861105..8d4cad1 100644
> --- a/arch/arm64/kernel/ptrace.c
> +++ b/arch/arm64/kernel/ptrace.c
> @@ -44,6 +44,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
> 
>  #define CREATE_TRACE_POINTS
>  #include 
> @@ -1231,12 +1232,73 @@ COMPAT_SYSCALL_DEFINE4(aarch32_ptrace,
> compat_long_t, request, compat_long_t, pi

and later on the patch

> 
>  #endif /* CONFIG_AARCH32_EL0 */
> 
> +#ifdef CONFIG_ARM64_ILP32
> +
> +static int compat_ilp32_ptrace(struct task_struct *child, compat_long_t
> request,
> + compat_ulong_t addr, compat_ulong_t data)
> +{
> + compat_ulong_t __user *datap = compat_ptr(data);
> + int ret;
> +
> + switch (request) {
> + case PTRACE_GETSIGMASK:
> + if (addr != sizeof(compat_sigset_t)) {
> + ret = -EINVAL;
> + break;
> + }
> +
> + if (put_sigset_t((compat_sigset_t __user *)datap, 
> >blocked))
> + ret = -EFAULT;
> + else
> + ret = 0;
> + break;
> +
> + case PTRACE_SETSIGMASK: {
> + sigset_t new_set;
> + if (addr != sizeof(compat_sigset_t)) {
> + ret = -EINVAL;
> + break;
> + }
> +
> + if (get_sigset_t(_set, (compat_sigset_t __user *)datap)) {
> + ret = -EFAULT;
> + break;
> + }
> +
> + sigdelsetmask(_set, sigmask(SIGKILL)|sigmask(SIGSTOP));
> +
> + /*
> +  * Every thread does recalc_sigpending() after resume, so
> +  * retarget_shared_pending() and recalc_sigpending() are not
> +  * called here.
> +  */
> + spin_lock_irq(>sighand->siglock);
> + child->blocked = new_set;
> + spin_unlock_irq(>sighand->siglock);
> +
> + ret = 0;
> + break;
> + }
> +
> + default:
> + ret = compat_ptrace_request(child, request, addr, data);
> + }
> +
> + return ret;
> +}
> +
> +#endif /* CONFIG_ARM64_ILP32 */
> +
>  #ifdef CONFIG_COMPAT
> 
>  long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
>   compat_ulong_t caddr, compat_ulong_t cdata)
>  {
> +#ifdef CONFIG_ARM64_ILP32
> + return compat_ilp32_ptrace(child, request, caddr, cdata);
> +#else
>   return compat_ptrace_request(child, request, caddr, cdata);
> +#endif
>  }
> 
>  #endif /* CONFIG_COMPAT */
> diff --git a/arch/arm64/kernel/signal_ilp32.c
> b/arch/arm64/kernel/signal_ilp32.c
> index 8ca64b9..3ef2749 100644
> --- a/arch/arm64/kernel/signal_ilp32.c
> +++ b/arch/arm64/kernel/signal_ilp32.c
> @@ -28,6 +28,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -58,28 +59,6 @@ struct ilp32_rt_sigframe {
>   struct ilp32_sigframe sig;
>  };
> 
> -static inline int 

[PATCH] ilp32: fix {GET,SET}SIGMASK request for ptrace

2016-06-26 Thread zhouchengming

The function compat_ptrace_request(used by ilp32) don't handle
{GET,SET}SIGMASK request, so it will be handled by ptrace_request.
But it's wrong because the compat_sigset_t of ilp32 differs from
the sigset_t of aarch64. The patch fixes it.

Signed-off-by: Zhou Chengming 
---
 arch/arm64/include/asm/signal_ilp32.h |   22 
 arch/arm64/kernel/ptrace.c|   62 
+

 arch/arm64/kernel/signal_ilp32.c  |   23 +
 3 files changed, 85 insertions(+), 22 deletions(-)

diff --git a/arch/arm64/include/asm/signal_ilp32.h 
b/arch/arm64/include/asm/signal_ilp32.h

index 30eff23..ed52607 100644
--- a/arch/arm64/include/asm/signal_ilp32.h
+++ b/arch/arm64/include/asm/signal_ilp32.h
@@ -21,6 +21,28 @@
 int ilp32_setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
  struct pt_regs *regs);

+static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set)
+{
+   compat_sigset_t cset;
+
+   cset.sig[0] = set->sig[0] & 0xull;
+   cset.sig[1] = set->sig[0] >> 32;
+
+   return copy_to_user(uset, , sizeof(*uset));
+}
+
+static inline int get_sigset_t(sigset_t *set,
+  const compat_sigset_t __user *uset)
+{
+   compat_sigset_t s32;
+
+   if (copy_from_user(, uset, sizeof(*uset)))
+   return -EFAULT;
+
+   set->sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32);
+   return 0;
+}
+
 #else

 static inline int ilp32_setup_rt_frame(int usig, struct ksignal *ksig, 
sigset_t *set,

diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index a861105..8d4cad1 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -44,6 +44,7 @@
 #include 
 #include 
 #include 
+#include 

 #define CREATE_TRACE_POINTS
 #include 
@@ -1231,12 +1232,73 @@ COMPAT_SYSCALL_DEFINE4(aarch32_ptrace, 
compat_long_t, request, compat_long_t, pi


 #endif /* CONFIG_AARCH32_EL0 */

+#ifdef CONFIG_ARM64_ILP32
+
+static int compat_ilp32_ptrace(struct task_struct *child, compat_long_t 
request,

+   compat_ulong_t addr, compat_ulong_t data)
+{
+   compat_ulong_t __user *datap = compat_ptr(data);
+   int ret;
+
+   switch (request) {
+   case PTRACE_GETSIGMASK:
+   if (addr != sizeof(compat_sigset_t)) {
+   ret = -EINVAL;
+   break;
+   }
+
+   if (put_sigset_t((compat_sigset_t __user *)datap, 
>blocked))
+   ret = -EFAULT;
+   else
+   ret = 0;
+   break;
+
+   case PTRACE_SETSIGMASK: {
+   sigset_t new_set;
+   if (addr != sizeof(compat_sigset_t)) {
+   ret = -EINVAL;
+   break;
+   }
+
+   if (get_sigset_t(_set, (compat_sigset_t __user *)datap)) {
+   ret = -EFAULT;
+   break;
+   }
+
+   sigdelsetmask(_set, sigmask(SIGKILL)|sigmask(SIGSTOP));
+
+   /*
+* Every thread does recalc_sigpending() after resume, so
+* retarget_shared_pending() and recalc_sigpending() are not
+* called here.
+*/
+   spin_lock_irq(>sighand->siglock);
+   child->blocked = new_set;
+   spin_unlock_irq(>sighand->siglock);
+
+   ret = 0;
+   break;
+   }
+
+   default:
+   ret = compat_ptrace_request(child, request, addr, data);
+   }
+
+   return ret;
+}
+
+#endif /* CONFIG_ARM64_ILP32 */
+
 #ifdef CONFIG_COMPAT

 long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
compat_ulong_t caddr, compat_ulong_t cdata)
 {
+#ifdef CONFIG_ARM64_ILP32
+   return compat_ilp32_ptrace(child, request, caddr, cdata);
+#else
return compat_ptrace_request(child, request, caddr, cdata);
+#endif
 }

 #endif /* CONFIG_COMPAT */
diff --git a/arch/arm64/kernel/signal_ilp32.c 
b/arch/arm64/kernel/signal_ilp32.c

index 8ca64b9..3ef2749 100644
--- a/arch/arm64/kernel/signal_ilp32.c
+++ b/arch/arm64/kernel/signal_ilp32.c
@@ -28,6 +28,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -58,28 +59,6 @@ struct ilp32_rt_sigframe {
struct ilp32_sigframe sig;
 };

-static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set)
-{
-   compat_sigset_t cset;
-
-   cset.sig[0] = set->sig[0] & 0xull;
-   cset.sig[1] = set->sig[0] >> 32;
-
-   return copy_to_user(uset, , sizeof(*uset));
-}
-
-static inline int get_sigset_t(sigset_t *set,
-   const compat_sigset_t __user *uset)
-{
-   compat_sigset_t s32;
-
-   if (copy_from_user(, uset, sizeof(*uset)))
-   return -EFAULT;
-
-   set->sig[0] = s32.sig[0] | 

Re: [PATCH 12/19] arm64: ptrace: handle ptrace_request differently for aarch32 and ilp32

2016-06-26 Thread zhouchengming

The {GET,SET}SIGMASK request of ptrace on ilp32 is wrong, it's handled
by ptrace_request(like aarch64). So I write a patch to fix it(just for
ilp32). I will send the patch next.

Thanks!

On 2016/6/18 7:54, Yury Norov wrote:

Here new aarch32 ptrace syscall handler is introsuced to avoid run-time
detection of the task type.

Signed-off-by: Yury Norov
---
  arch/arm64/include/asm/unistd32.h |  2 +-
  arch/arm64/kernel/ptrace.c| 50 ++-
  arch/arm64/kernel/sys32.c |  1 +
  include/linux/ptrace.h|  6 +
  kernel/ptrace.c   | 10 
  5 files changed, 62 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/include/asm/unistd32.h 
b/arch/arm64/include/asm/unistd32.h
index b7e8ef1..6da7cbd 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -74,7 +74,7 @@ __SYSCALL(__NR_getuid, sys_getuid16)
/* 25 was sys_stime */
  __SYSCALL(25, sys_ni_syscall)
  #define __NR_ptrace 26
-__SYSCALL(__NR_ptrace, compat_sys_ptrace)
+__SYSCALL(__NR_ptrace, compat_sys_aarch32_ptrace)
/* 27 was sys_alarm */
  __SYSCALL(27, sys_ni_syscall)
/* 28 was sys_fstat */
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 38a09338..a861105 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -29,6 +29,7 @@
  #include
  #include
  #include
+#include
  #include
  #include
  #include
@@ -1114,7 +1115,7 @@ static int compat_ptrace_sethbpregs(struct task_struct 
*tsk, compat_long_t num,
  }
  #endif/* CONFIG_HAVE_HW_BREAKPOINT */

-long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
+static long compat_a32_ptrace(struct task_struct *child, compat_long_t request,
compat_ulong_t caddr, compat_ulong_t cdata)
  {
unsigned long addr = caddr;
@@ -1191,8 +1192,55 @@ long compat_arch_ptrace(struct task_struct *child, 
compat_long_t request,

return ret;
  }
+
+COMPAT_SYSCALL_DEFINE4(aarch32_ptrace, compat_long_t, request, compat_long_t, 
pid,
+  compat_long_t, addr, compat_long_t, data)
+{
+   struct task_struct *child;
+   long ret;
+
+   if (request == PTRACE_TRACEME) {
+   ret = ptrace_traceme();
+   goto out;
+   }
+
+   child = ptrace_get_task_struct(pid);
+   if (IS_ERR(child)) {
+   ret = PTR_ERR(child);
+   goto out;
+   }
+
+   if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) {
+   ret = ptrace_attach(child, request, addr, data);
+   goto out_put_task_struct;
+   }
+
+   ret = ptrace_check_attach(child, request == PTRACE_KILL ||
+ request == PTRACE_INTERRUPT);
+   if (!ret) {
+   ret = compat_a32_ptrace(child, request, addr, data);
+   if (ret || request != PTRACE_DETACH)
+   ptrace_unfreeze_traced(child);
+   }
+
+ out_put_task_struct:
+   put_task_struct(child);
+ out:
+   return ret;
+}
+
  #endif /* CONFIG_AARCH32_EL0 */

+#ifdef CONFIG_COMPAT
+
+long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
+   compat_ulong_t caddr, compat_ulong_t cdata)
+{
+   return compat_ptrace_request(child, request, caddr, cdata);
+}
+
+#endif /* CONFIG_COMPAT */
+
  const struct user_regset_view *task_user_regset_view(struct task_struct *task)
  {
  #ifdef CONFIG_AARCH32_EL0
diff --git a/arch/arm64/kernel/sys32.c b/arch/arm64/kernel/sys32.c
index a40b134..3752443 100644
--- a/arch/arm64/kernel/sys32.c
+++ b/arch/arm64/kernel/sys32.c
@@ -38,6 +38,7 @@ asmlinkage long compat_sys_fadvise64_64_wrapper(void);
  asmlinkage long compat_sys_sync_file_range2_wrapper(void);
  asmlinkage long compat_sys_fallocate_wrapper(void);
  asmlinkage long compat_sys_mmap2_wrapper(void);
+asmlinkage long compat_sys_aarch32_ptrace(void);

  #undef __SYSCALL
  #define __SYSCALL(nr, sym)[nr] = sym,
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 504c98a..75887a0 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -97,6 +97,12 @@ int generic_ptrace_peekdata(struct task_struct *tsk, 
unsigned long addr,
unsigned long data);
  int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr,
unsigned long data);
+int ptrace_traceme(void);
+struct task_struct *ptrace_get_task_struct(pid_t pid);
+int ptrace_attach(struct task_struct *task, long request,
+unsigned long addr, unsigned long flags);
+int ptrace_check_attach(struct task_struct *child, bool ignore_state);
+void ptrace_unfreeze_traced(struct task_struct *task);

  /**
   * ptrace_parent - return the task that is tracing the given task
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 

Re: [PATCH 17/23] arm64: ptrace: handle ptrace_request differently for aarch32 and ilp32

2016-06-26 Thread zhouchengming

On 2016/6/25 22:15, Bamvor Zhang wrote:

Hi, Chengming

On Sat, Jun 25, 2016 at 5:36 PM, zhouchengming
  wrote:

On 2016/6/9 1:00, Yury Norov wrote:


On Wed, Jun 08, 2016 at 09:34:09AM +0800, zhouchengming wrote:


On 2016/5/24 8:04, Yury Norov wrote:


Here new aarch32 ptrace syscall handler is introsuced to avoid run-time
detection of the task type.

Signed-off-by: Yury Norov



[...]


Hello, I found ilp32 will use sys_ptrace, not compat_sys_ptrace. So I
write
a little patch to see if can solve the problem correctly.

Thanks.

  From f6156236df578bb05c4a17e7f9776ceaf8f7afe6 Mon Sep 17 00:00:00 2001
From: Zhou Chengming
Date: Wed, 8 Jun 2016 09:46:23 +0800
Subject: [PATCH] ilp32: use compat_sys_ptrace instead of sys_ptrace

When we analyze a testcase of ptrace that failed on ilp32, we found
the syscall that the ilp32 uses is sys_ptrace, not compat_sys_ptrace.
Because in include/uapi/asm-generic/unistd.h it's defined like:
__SYSCALL(__NR_ptrace, sys_ptrace)
So we change it to __SC_COMP(__NR_ptrace, sys_ptrace, compat_sys_ptrace),
let compat tasks use the compat_sys_ptrace.

Signed-off-by: Zhou Chengming
---
   include/uapi/asm-generic/unistd.h |2 +-
   1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/include/uapi/asm-generic/unistd.h
b/include/uapi/asm-generic/unistd.h
index 2862d2e..50ee770 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -364,7 +364,7 @@ __SC_WRAP(__NR_syslog, sys_syslog)

   /* kernel/ptrace.c */
   #define __NR_ptrace 117
-__SYSCALL(__NR_ptrace, sys_ptrace)
+__SC_COMP(__NR_ptrace, sys_ptrace, compat_sys_ptrace)

   /* kernel/sched/core.c */
   #define __NR_sched_setparam 118
--
1.7.7



Hi Zhou,

Thank you for the catch.

Could you also show the test that is failed for you. It should
probably be sent to LTP maillist.

I'm not sure your fix correct as it affects other architectures that
use standard unistd.h. I think it's better to redirect the syscall in
arch/arm64/kernel/sys_ilp32.c with corresponding definition.

Yury

.



Sorry, I missed this mail. Thanks for your reply. :)
I attach the testcase file of ptrace that failed on ilp32.
I also think it's better to redirect the syscall in ilp32, so I changed
the patch.


Thanks for your patch. But Yury has already sent an new series this week
which define ptrace to compat one.

It seems that Yury do not take GET/SETSIGMASK into account. You
could share your test case and patches at this point.

Best wishes

Bamvor


Ok, I get it. So the new series can handle ptrace correctly. :)
But as for the GET/SETSIGMASK request, both ilp32 and aarch32 are wrong, 
because they are handled in ptrace_request(like aarch64). But I don't 
have a good way to correct it in all architectures.

The architectures that use compat_ptrace_request are:
arch/arm64, arch/mips, arch/parisc, arch/powerpc, arch/s390, arch/sparc,
arch/tile, arch/x86.
We have to use two architecture dependent functions in 
compat_ptrace_request to fix the GET/SETSIGMASK request. (as for 
arch/arm64, the two functions are: put_sigset_t and get_sigset_t)

So we maybe have to prepare these functions for all these architectures.
But I don't have much time to work on it. So I just fix it for ilp32 of 
arch/arm64 (Ah, easier way for me). I will put my patch of this fix 
under Yury's new series.


Thanks!



[1] https://www.mail-archive.com/linux-doc@vger.kernel.org/msg03811.html



 From 7e692ba1adf02c2a2f125836f5222f455c9ffe56 Mon Sep 17 00:00:00 2001
From: Zhou Chengming
Date: Sat, 25 Jun 2016 18:02:51 +0800
Subject: [PATCH] ilp32 should use compat_sys_ptrace

The file include/uapi/asm-generic/unistd.h defines this:
__SYSCALL(__NR_ptrace, sys_ptrace)
It may cause some ptrace tests failed on ilp32. So we redirect the ptrace
syscall in arch/arm64/kernel/sys_ilp32.c with corresponding definition.

Signed-off-by: Zhou Chengming
---
  arch/arm64/kernel/sys_ilp32.c |3 +++
  1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/arch/arm64/kernel/sys_ilp32.c b/arch/arm64/kernel/sys_ilp32.c
index d85fe94..06d5e1b 100644
--- a/arch/arm64/kernel/sys_ilp32.c
+++ b/arch/arm64/kernel/sys_ilp32.c
@@ -46,6 +46,9 @@
  asmlinkage long ilp32_sys_rt_sigreturn_wrapper(void);
  #define compat_sys_rt_sigreturnilp32_sys_rt_sigreturn_wrapper

+/* ilp32 should use compat_sys_ptrace */
+#define sys_ptracecompat_sys_ptrace
+
  #include

  #undef __SYSCALL
--
1.7.7










--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] cgroup: pids: show number of failed forks since limit reset

2016-06-26 Thread Tejun Heo
Hello,

On Sun, Jun 26, 2016 at 09:34:41PM +1000, Aleksa Sarai wrote:
> If a user has a setup where they wait for notifications on changes to
> pids.event, and then auto-adjust the cgroup limits based on the number of
> failures you have a race condition between reading the pids.event file and
> then setting the new limit. Then, upon getting notified again there may have
> been many failed forks with the old limit set, so you might decide to bump
> up the limit again.
> 
> It's not a huge deal, I just though it could be useful to alleviate problems
> like the above.

This is something which can easily be avoided from userland.  I don't
think we need to add extra facilities for this.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] capabilities: add capability cgroup controller

2016-06-26 Thread Tejun Heo
Hello, Topi.

On Sun, Jun 26, 2016 at 3:14 PM, Topi Miettinen  wrote:
> The parent might be able do it if proc/pid/xyz files are still
> accessible after child exit but before its exit status is collected. But
> if the parent doesn't do it (and you are not able to change it to do it)
> and it collects the exit status without collecting other info, can you
> suggest a different way how another process could collect it 100% reliably?

I'm not saying that there's such mechanism now. I'm suggesting that
that'd be a more fitting way of implementing a new mechanism to track
capability usages.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] capabilities: add capability cgroup controller

2016-06-26 Thread Topi Miettinen
On 06/24/16 17:24, Tejun Heo wrote:
> Hello, Serge.
> 
> On Fri, Jun 24, 2016 at 11:59:10AM -0500, Serge E. Hallyn wrote:
>>> Just monitoring is less jarring than implementing security enforcement
>>> via cgroup, but it is still jarring.  What's wrong with recursive
>>> process hierarchy monitoring which is in line with the whole facility
>>> is implemented anyway?
>>
>> As I think Topi pointed out, one shortcoming is that if there is a 
>> short-lived
>> child task, using its /proc/self/status is racy.  You might just miss that it
>> ever even existed, let alone that the "application" needed it.
> 
> But the parent can collect whatever its children used.  We already do
> that with other stats.

The parent might be able do it if proc/pid/xyz files are still
accessible after child exit but before its exit status is collected. But
if the parent doesn't do it (and you are not able to change it to do it)
and it collects the exit status without collecting other info, can you
suggest a different way how another process could collect it 100% reliably?

-Topi

> 
> Thanks.
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] capabilities: add capability cgroup controller

2016-06-26 Thread Topi Miettinen
On 06/24/16 17:21, Eric W. Biederman wrote:
> "Serge E. Hallyn"  writes:
> 
>> Quoting Tejun Heo (t...@kernel.org):
>>> Hello,
>>>
>>> On Fri, Jun 24, 2016 at 10:59:16AM -0500, Serge E. Hallyn wrote:
 Quoting Tejun Heo (t...@kernel.org):
> But isn't being recursive orthogonal to using cgroup?  Why not account
> usages recursively along the process hierarchy?  Capabilities don't
> have much to do with cgroup but everything with process hierarchy.
> That's how they're distributed and modified.  If monitoring their
> usages is necessary, it makes sense to do it in the same structure.

 That was my argument against using cgroups to enforce a new bounding
 set.  For tracking though, the cgroup process tracking seems as applicable
 to this as it does to systemd tracking of services.  It tracks a task and
 the children it forks.
>>>
>>> Just monitoring is less jarring than implementing security enforcement
>>> via cgroup, but it is still jarring.  What's wrong with recursive
>>> process hierarchy monitoring which is in line with the whole facility
>>> is implemented anyway?
>>
>> As I think Topi pointed out, one shortcoming is that if there is a 
>> short-lived
>> child task, using its /proc/self/status is racy.  You might just miss that it
>> ever even existed, let alone that the "application" needed it.
>>
>> Another alternative we've both mentioned is to use systemtap.  That's not
>> as nice a solution as a cgroup, but then again this isn't really a common
>> case, so maybe it is precisely what a tracing infrastructure is meant for.
> 
> Hmm.
> 
> We have capability use wired up into auditing.  So we might be able to
> get away with just adding an appropriate audit message in
> commoncap.c:cap_capable that honors the audit flag and logs an audit
> message.  The hook in selinux already appears to do that.
> 
> Certainly audit sounds like the subsystem for this kind of work, as it's
> whole point in life is logging things, then something in userspace can
> just run over the audit longs and build a nice summary.

Even simpler would be to avoid the complexity of audit subsystem and
just printk() when a task starts using a capability first time (not on
further uses by same task). There are not that many capability bits nor
privileged processes, meaning not too many log entries. I know as this
was actually my first approach. But it's also far less user friendly
than just reading a summarized value which could be directly fed back to
configuration.

Logging/auditing approach also doesn't work well for other things I'd
like to present meaningful values for the user. For example, consider
RLIMIT_AS, where my goal is also to enable the users to be able to
configure this limit for a service. Should there be an audit message
whenever the address space limit grows (i.e. each mmap())? What about
when it shrinks? For RLIMIT_NOFILE we'd have to report each
open()/close()/dup()/socket()/etc. and track how many are opened at the
same time. I think it's better to store the fully cooked (meaningful to
user) value in kernel and present it only when asked.

-Topi

> 
> Eric
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] cgroup: pids: show number of failed forks since limit reset

2016-06-26 Thread Aleksa Sarai

On Fri, Jun 24, 2016 at 01:00:48PM +1000, Aleksa Sarai wrote:

This allows users to dynamically adjust their limits based on how many
failed forks happened since they last reset their limits, otherwise they
would have to track (in a racy way) how many limit failures there were
since the last limit change manually. In addition, we log the first
failure since the limit was reset (which was the original semantics of
the patchset).


Isn't that trivially achievable by reading the counter and then
calculating the diff?  I don't think it matters all that much whether
the log message is printed once per cgroup or per config-change.  It's
just a hint for the admin to avoid setting her off on a wild goose
chase.


If a user has a setup where they wait for notifications on changes to 
pids.event, and then auto-adjust the cgroup limits based on the number 
of failures you have a race condition between reading the pids.event 
file and then setting the new limit. Then, upon getting notified again 
there may have been many failed forks with the old limit set, so you 
might decide to bump up the limit again.


It's not a huge deal, I just though it could be useful to alleviate 
problems like the above.


--
Aleksa Sarai
Software Engineer (Containers)
SUSE Linux GmbH
https://www.cyphar.com/
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html