[PATCH] arch/arm: enable task isolation functionality

2016-09-22 Thread Francis Giraldeau
This patch is a port of the task isolation functionality to the arm 32-bit
architecture. The task isolation needs an additional thread flag that
requires to change the entry assembly code to accept a bitfield larger than
one byte.  The constants _TIF_SYSCALL_WORK and _TIF_WORK_MASK are now
defined in the literal pool. The rest of the patch is straightforward and
reflects what is done on other architectures.

Signed-off-by: Francis Giraldeau <francis.girald...@gmail.com>
---
 arch/arm/Kconfig   |  1 +
 arch/arm/include/asm/thread_info.h |  8 ++--
 arch/arm/kernel/entry-common.S | 15 ++-
 arch/arm/kernel/ptrace.c   | 10 ++
 arch/arm/kernel/signal.c   | 12 +++-
 arch/arm/kernel/smp.c  |  4 
 arch/arm/mm/fault.c|  9 -
 tools/testing/selftests/task_isolation/isolation.c | 14 ++
 8 files changed, 60 insertions(+), 13 deletions(-)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 018ee76..0b147e4 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -40,6 +40,7 @@ config ARM
select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU
select HAVE_ARCH_MMAP_RND_BITS if MMU
select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT)
+   select HAVE_ARCH_TASK_ISOLATION
select HAVE_ARCH_TRACEHOOK
select HAVE_ARM_SMCCC if CPU_V7
select HAVE_CBPF_JIT
diff --git a/arch/arm/include/asm/thread_info.h 
b/arch/arm/include/asm/thread_info.h
index 776757d..c83ce56 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -145,6 +145,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user 
*,
 #define TIF_SECCOMP7   /* seccomp syscall filtering active */
 
 #define TIF_NOHZ   12  /* in adaptive nohz mode */
+#define TIF_TASK_ISOLATION 13  /* task isolation active */
 #define TIF_USING_IWMMXT   17
 #define TIF_MEMDIE 18  /* is terminating due to OOM killer */
 #define TIF_RESTORE_SIGMASK20
@@ -158,16 +159,19 @@ extern int vfp_restore_user_hwstate(struct user_vfp 
__user *,
 #define _TIF_SYSCALL_TRACEPOINT(1 << TIF_SYSCALL_TRACEPOINT)
 #define _TIF_SECCOMP   (1 << TIF_SECCOMP)
 #define _TIF_USING_IWMMXT  (1 << TIF_USING_IWMMXT)
+#define _TIF_TASK_ISOLATION(1 << TIF_TASK_ISOLATION)
 
 /* Checks for any syscall work in entry-common.S */
 #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
-  _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP)
+  _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
+  _TIF_TASK_ISOLATION)
 
 /*
  * Change these and you break ASM code in entry-common.S
  */
 #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
-_TIF_NOTIFY_RESUME | _TIF_UPROBE)
+_TIF_NOTIFY_RESUME | _TIF_UPROBE | \
+_TIF_TASK_ISOLATION)
 
 #endif /* __KERNEL__ */
 #endif /* __ASM_ARM_THREAD_INFO_H */
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 10c3283..dd8c45b 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -36,7 +36,8 @@ ret_fast_syscall:
  UNWIND(.cantunwind)
disable_irq_notrace @ disable interrupts
ldr r1, [tsk, #TI_FLAGS]@ re-check for syscall tracing
-   tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
+   ldr r2, =_TIF_SYSCALL_WORK | _TIF_WORK_MASK
+   tst r1, r2
bne fast_work_pending
 
/* perform architecture specific actions before user return */
@@ -62,7 +63,8 @@ ret_fast_syscall:
str r0, [sp, #S_R0 + S_OFF]!@ save returned r0
disable_irq_notrace @ disable interrupts
ldr r1, [tsk, #TI_FLAGS]@ re-check for syscall tracing
-   tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
+   ldr r2, =_TIF_SYSCALL_WORK | _TIF_WORK_MASK
+   tst r1, r2
beq no_work_pending
  UNWIND(.fnend )
 ENDPROC(ret_fast_syscall)
@@ -70,7 +72,8 @@ ENDPROC(ret_fast_syscall)
/* Slower path - fall through to work_pending */
 #endif
 
-   tst r1, #_TIF_SYSCALL_WORK
+   ldr r2, =_TIF_SYSCALL_WORK
+   tst r1, r2
bne __sys_trace_return_nosave
 slow_work_pending:
mov r0, sp  @ 'regs'
@@ -94,7 +97,8 @@ ret_slow_syscall:
disable_irq_notrace @ disable interrupts
 ENTRY(ret_to_user_from_irq)
ldr r1, [tsk, #TI_FLAGS]
-   tst r1, #_TIF_WORK_MASK
+   ldr r2, =_TIF_WORK_MASK
+   tst r1, r2
bne slow_work_pending
 

[PATCH] arch/arm: enable task isolation functionality

2016-09-22 Thread Francis Giraldeau
This patch is a port of the task isolation functionality to the arm 32-bit
architecture. The task isolation needs an additional thread flag that
requires to change the entry assembly code to accept a bitfield larger than
one byte.  The constants _TIF_SYSCALL_WORK and _TIF_WORK_MASK are now
defined in the literal pool. The rest of the patch is straightforward and
reflects what is done on other architectures.

Signed-off-by: Francis Giraldeau 
---
 arch/arm/Kconfig   |  1 +
 arch/arm/include/asm/thread_info.h |  8 ++--
 arch/arm/kernel/entry-common.S | 15 ++-
 arch/arm/kernel/ptrace.c   | 10 ++
 arch/arm/kernel/signal.c   | 12 +++-
 arch/arm/kernel/smp.c  |  4 
 arch/arm/mm/fault.c|  9 -
 tools/testing/selftests/task_isolation/isolation.c | 14 ++
 8 files changed, 60 insertions(+), 13 deletions(-)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 018ee76..0b147e4 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -40,6 +40,7 @@ config ARM
select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU
select HAVE_ARCH_MMAP_RND_BITS if MMU
select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT)
+   select HAVE_ARCH_TASK_ISOLATION
select HAVE_ARCH_TRACEHOOK
select HAVE_ARM_SMCCC if CPU_V7
select HAVE_CBPF_JIT
diff --git a/arch/arm/include/asm/thread_info.h 
b/arch/arm/include/asm/thread_info.h
index 776757d..c83ce56 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -145,6 +145,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user 
*,
 #define TIF_SECCOMP7   /* seccomp syscall filtering active */
 
 #define TIF_NOHZ   12  /* in adaptive nohz mode */
+#define TIF_TASK_ISOLATION 13  /* task isolation active */
 #define TIF_USING_IWMMXT   17
 #define TIF_MEMDIE 18  /* is terminating due to OOM killer */
 #define TIF_RESTORE_SIGMASK20
@@ -158,16 +159,19 @@ extern int vfp_restore_user_hwstate(struct user_vfp 
__user *,
 #define _TIF_SYSCALL_TRACEPOINT(1 << TIF_SYSCALL_TRACEPOINT)
 #define _TIF_SECCOMP   (1 << TIF_SECCOMP)
 #define _TIF_USING_IWMMXT  (1 << TIF_USING_IWMMXT)
+#define _TIF_TASK_ISOLATION(1 << TIF_TASK_ISOLATION)
 
 /* Checks for any syscall work in entry-common.S */
 #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
-  _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP)
+  _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
+  _TIF_TASK_ISOLATION)
 
 /*
  * Change these and you break ASM code in entry-common.S
  */
 #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
-_TIF_NOTIFY_RESUME | _TIF_UPROBE)
+_TIF_NOTIFY_RESUME | _TIF_UPROBE | \
+_TIF_TASK_ISOLATION)
 
 #endif /* __KERNEL__ */
 #endif /* __ASM_ARM_THREAD_INFO_H */
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 10c3283..dd8c45b 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -36,7 +36,8 @@ ret_fast_syscall:
  UNWIND(.cantunwind)
disable_irq_notrace @ disable interrupts
ldr r1, [tsk, #TI_FLAGS]@ re-check for syscall tracing
-   tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
+   ldr r2, =_TIF_SYSCALL_WORK | _TIF_WORK_MASK
+   tst r1, r2
bne fast_work_pending
 
/* perform architecture specific actions before user return */
@@ -62,7 +63,8 @@ ret_fast_syscall:
str r0, [sp, #S_R0 + S_OFF]!@ save returned r0
disable_irq_notrace @ disable interrupts
ldr r1, [tsk, #TI_FLAGS]@ re-check for syscall tracing
-   tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
+   ldr r2, =_TIF_SYSCALL_WORK | _TIF_WORK_MASK
+   tst r1, r2
beq no_work_pending
  UNWIND(.fnend )
 ENDPROC(ret_fast_syscall)
@@ -70,7 +72,8 @@ ENDPROC(ret_fast_syscall)
/* Slower path - fall through to work_pending */
 #endif
 
-   tst r1, #_TIF_SYSCALL_WORK
+   ldr r2, =_TIF_SYSCALL_WORK
+   tst r1, r2
bne __sys_trace_return_nosave
 slow_work_pending:
mov r0, sp  @ 'regs'
@@ -94,7 +97,8 @@ ret_slow_syscall:
disable_irq_notrace @ disable interrupts
 ENTRY(ret_to_user_from_irq)
ldr r1, [tsk, #TI_FLAGS]
-   tst r1, #_TIF_WORK_MASK
+   ldr r2, =_TIF_WORK_MASK
+   tst r1, r2
bne slow_work_pending
 no_work_pending:
asm_tra

Re: Ping: [PATCH v15 00/13] support "task_isolation" mode

2016-09-13 Thread Francis Giraldeau
On 2016-09-12 08:05 PM, Rafael J. Wysocki wrote:
> On Monday, September 12, 2016 11:15:45 PM Rafael J. Wysocki wrote:
>> On Monday, September 12, 2016 06:14:44 PM Peter Zijlstra wrote:
>>> On Mon, Sep 12, 2016 at 12:01:58PM -0400, Chris Metcalf wrote:
>>>> On 9/7/2016 5:11 PM, Francis Giraldeau wrote:
>>>>> When running only the test_jitter(), the isolation mode is lost:
>>>>>
>>>>> [ 6741.566048] isolation/9515: task_isolation mode lost due to 
>>>>> irq_work
>>>>>
>>>>> With ftrace (events/workqueue/workqueue_execute_start), I get a bit more 
>>>>> info:
>>>>>
>>>>>  kworker/1:1-676   [001]   6610.097128: workqueue_execute_start: 
>>>>> work struct 8801a784ca20: function dbs_work_handler
>>>>>
>>>>> The governor was ondemand, so I tried to set the frequency scaling
>>>>> governor to performance, but that does not solve the issue.
>>>
>>> Rafael, I'm thinking the performance governor should be able to run
>>> without sending IPIs. Is there anything we can quickly do about that?
>>
>> The performance governor doesn't do any IPIs.
>>
>> At this point I'm not sure what's going on.
> 
> I've just tried and switching over to the performance governor makes
> dbs_work_handler go away for me (w/ -rc4 with some extra irrelevant patches
> on top) as it should.

Ho gosh, the command "cpufreq-set -g performance" sets the governor only
for cpu0. I was expecting it to set for all cpus. The isolation test runs
on cpu1 and this cpu was still with ondemand governor. With performance
governor, the dbs_work_handler does not occurs anymore and the isolated
task is not preempted by that kworker thread anymore.

Sorry for the noise and thanks for checking!

Francis


Re: Ping: [PATCH v15 00/13] support "task_isolation" mode

2016-09-13 Thread Francis Giraldeau
On 2016-09-12 08:05 PM, Rafael J. Wysocki wrote:
> On Monday, September 12, 2016 11:15:45 PM Rafael J. Wysocki wrote:
>> On Monday, September 12, 2016 06:14:44 PM Peter Zijlstra wrote:
>>> On Mon, Sep 12, 2016 at 12:01:58PM -0400, Chris Metcalf wrote:
>>>> On 9/7/2016 5:11 PM, Francis Giraldeau wrote:
>>>>> When running only the test_jitter(), the isolation mode is lost:
>>>>>
>>>>> [ 6741.566048] isolation/9515: task_isolation mode lost due to 
>>>>> irq_work
>>>>>
>>>>> With ftrace (events/workqueue/workqueue_execute_start), I get a bit more 
>>>>> info:
>>>>>
>>>>>  kworker/1:1-676   [001]   6610.097128: workqueue_execute_start: 
>>>>> work struct 8801a784ca20: function dbs_work_handler
>>>>>
>>>>> The governor was ondemand, so I tried to set the frequency scaling
>>>>> governor to performance, but that does not solve the issue.
>>>
>>> Rafael, I'm thinking the performance governor should be able to run
>>> without sending IPIs. Is there anything we can quickly do about that?
>>
>> The performance governor doesn't do any IPIs.
>>
>> At this point I'm not sure what's going on.
> 
> I've just tried and switching over to the performance governor makes
> dbs_work_handler go away for me (w/ -rc4 with some extra irrelevant patches
> on top) as it should.

Ho gosh, the command "cpufreq-set -g performance" sets the governor only
for cpu0. I was expecting it to set for all cpus. The isolation test runs
on cpu1 and this cpu was still with ondemand governor. With performance
governor, the dbs_work_handler does not occurs anymore and the isolated
task is not preempted by that kworker thread anymore.

Sorry for the noise and thanks for checking!

Francis


Re: Ping: [PATCH v15 00/13] support "task_isolation" mode

2016-09-12 Thread Francis Giraldeau
On 2016-09-12 12:01 PM, Chris Metcalf wrote:
> The syscall test fails on x86:
>>
>>  $ sudo ./isolation
>>  [...]
>>  test_syscall: FAIL (0x100)
>>  test_syscall (SIGUSR1): FAIL (0x100) 
>
> Your next email suggested adding TIF_TASK_ISOLATION to the set of
> flags in _TIF_WORK_SYSCALL_ENTRY.  I'm happy to make this change
> regardless (it's consistent with Andy's request to add the task
> isolation flag to _TIF_ALLWORK_MASK), but I'm puzzled: as far as
> I know there is no way for TIF_TASK_ISOLATION to be set unless
> TIF_NOHZ is also set.  The context_tracking_init() code forces TIF_NOHZ
> on for every task during boot up, and nothing ever clears it, so...
>

Hello!

You are right, on entry to syscall_trace_enter() the flags is
(_TIF_NOHZ | _TIF_TASK_ISOLATION):

[   22.634988] isolation thread flags: 0x82000

But at linux/arch/x86/entry/common.c:83

work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY;

the flag _TIF_TASK_ISOLATION was cleared because it is not included in
_TIF_WORK_SYSCALL_ENTRY. Then, the test below is always false:

if (work & _TIF_TASK_ISOLATION) {
if (task_isolation_syscall(regs->orig_ax) == -1)
return -1L;
work &= ~_TIF_TASK_ISOLATION;
}

To fix the issue, _TIF_TASK_ISOLATION must be in _TIF_WORK_SYSCALL_ENTRY.
It works on arm64 because the flags are used directly without a mask applied.

>> BTW, this was causing the test to enter an infinite loop. If the clock
>> source is not reliable, maybe a different error code should be returned,
>> because this situation not transient. 
>
> That's a good idea - do you know what the check should be in that
> case?  We can just return EINVAL, as you suggest. 

The args are valid, but the system has an unstable clock, therefore the
operation is not supported. In the user point of view, maybe ENOTSUPP
would be more appropriate? But then, we need to check the reason and
can_stop_my_full_tick() returns only a boolean.

On a side note, the NOSIG mode may be confusing for the users. At first,
I was expecting that NOSIG behaves the same way as the normal task isolation
mode. In the current situation, if the user wants the normal behavior, but
does not care about the signal, the user must register an empty signal handler.

However, if I understand correctly, other settings beside NOHZ and isolcpus
are required to support quiet CPUs, such as irq_affinity and rcu_nocb. It would
be very convenient from the user point of view if these other settings were 
configure
correctly.

I can work on that and also write some doc (Documentation/task-isolation.txt ?).

> Thanks a lot for your help! 

Many thanks for your feedback,

Francis



Re: Ping: [PATCH v15 00/13] support "task_isolation" mode

2016-09-12 Thread Francis Giraldeau
On 2016-09-12 12:01 PM, Chris Metcalf wrote:
> The syscall test fails on x86:
>>
>>  $ sudo ./isolation
>>  [...]
>>  test_syscall: FAIL (0x100)
>>  test_syscall (SIGUSR1): FAIL (0x100) 
>
> Your next email suggested adding TIF_TASK_ISOLATION to the set of
> flags in _TIF_WORK_SYSCALL_ENTRY.  I'm happy to make this change
> regardless (it's consistent with Andy's request to add the task
> isolation flag to _TIF_ALLWORK_MASK), but I'm puzzled: as far as
> I know there is no way for TIF_TASK_ISOLATION to be set unless
> TIF_NOHZ is also set.  The context_tracking_init() code forces TIF_NOHZ
> on for every task during boot up, and nothing ever clears it, so...
>

Hello!

You are right, on entry to syscall_trace_enter() the flags is
(_TIF_NOHZ | _TIF_TASK_ISOLATION):

[   22.634988] isolation thread flags: 0x82000

But at linux/arch/x86/entry/common.c:83

work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY;

the flag _TIF_TASK_ISOLATION was cleared because it is not included in
_TIF_WORK_SYSCALL_ENTRY. Then, the test below is always false:

if (work & _TIF_TASK_ISOLATION) {
if (task_isolation_syscall(regs->orig_ax) == -1)
return -1L;
work &= ~_TIF_TASK_ISOLATION;
}

To fix the issue, _TIF_TASK_ISOLATION must be in _TIF_WORK_SYSCALL_ENTRY.
It works on arm64 because the flags are used directly without a mask applied.

>> BTW, this was causing the test to enter an infinite loop. If the clock
>> source is not reliable, maybe a different error code should be returned,
>> because this situation not transient. 
>
> That's a good idea - do you know what the check should be in that
> case?  We can just return EINVAL, as you suggest. 

The args are valid, but the system has an unstable clock, therefore the
operation is not supported. In the user point of view, maybe ENOTSUPP
would be more appropriate? But then, we need to check the reason and
can_stop_my_full_tick() returns only a boolean.

On a side note, the NOSIG mode may be confusing for the users. At first,
I was expecting that NOSIG behaves the same way as the normal task isolation
mode. In the current situation, if the user wants the normal behavior, but
does not care about the signal, the user must register an empty signal handler.

However, if I understand correctly, other settings beside NOHZ and isolcpus
are required to support quiet CPUs, such as irq_affinity and rcu_nocb. It would
be very convenient from the user point of view if these other settings were 
configure
correctly.

I can work on that and also write some doc (Documentation/task-isolation.txt ?).

> Thanks a lot for your help! 

Many thanks for your feedback,

Francis



Re: Ping: [PATCH v15 00/13] support "task_isolation" mode

2016-09-08 Thread Francis Giraldeau
On 2016-09-07 05:11 PM, Francis Giraldeau wrote:
> The syscall test fails on x86:
> $ sudo ./isolation
> [...]
> test_syscall: FAIL (0x100)
> test_syscall (SIGUSR1): FAIL (0x100)

The fix is indeed very simple:

diff --git a/arch/x86/include/asm/thread_info.h 
b/arch/x86/include/asm/thread_info.h
index 7255367..449e2b5 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -139,7 +139,7 @@ struct thread_info {
 #define _TIF_WORK_SYSCALL_ENTRY\
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT |   \
 _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT |   \
-_TIF_NOHZ)
+_TIF_NOHZ | _TIF_TASK_ISOLATION)
 
 /* work to do on any return to user space */
 #define _TIF_ALLWORK_MASK  \


I updated the branch accordingly:

https://github.com/giraldeau/linux/commit/057761af7bde087fa10aa42554a13a7b69071938

Thanks,

Francis


Re: Ping: [PATCH v15 00/13] support "task_isolation" mode

2016-09-08 Thread Francis Giraldeau
On 2016-09-07 05:11 PM, Francis Giraldeau wrote:
> The syscall test fails on x86:
> $ sudo ./isolation
> [...]
> test_syscall: FAIL (0x100)
> test_syscall (SIGUSR1): FAIL (0x100)

The fix is indeed very simple:

diff --git a/arch/x86/include/asm/thread_info.h 
b/arch/x86/include/asm/thread_info.h
index 7255367..449e2b5 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -139,7 +139,7 @@ struct thread_info {
 #define _TIF_WORK_SYSCALL_ENTRY\
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT |   \
 _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT |   \
-_TIF_NOHZ)
+_TIF_NOHZ | _TIF_TASK_ISOLATION)
 
 /* work to do on any return to user space */
 #define _TIF_ALLWORK_MASK  \


I updated the branch accordingly:

https://github.com/giraldeau/linux/commit/057761af7bde087fa10aa42554a13a7b69071938

Thanks,

Francis


Re: Ping: [PATCH v15 00/13] support "task_isolation" mode

2016-09-07 Thread Francis Giraldeau
On 2016-09-07 05:11 PM, Francis Giraldeau wrote:
> The syscall test fails on x86:
> $ sudo ./isolation
> [...]
> test_syscall: FAIL (0x100)
> test_syscall (SIGUSR1): FAIL (0x100)
>
> I wanted to debug this problem with gdb and a KVM virtual machine. However, 
> the TSC clock source is detected as non reliable, even with the boot 
> parameter tsc=reliable, and therefore prctl(PR_SET_TASK_ISOLATION, 
> PR_TASK_ISOLATION_ENABLE) always returns EAGAIN. Is there a trick to run task 
> isolation in a VM (at least for debugging purposes)?

OK, got it. The guest kernel must be compiled with CONFIG_KVM_GUEST, and then 
with virsh edit, set the clock configuration of the VM (under ):

 

  

Of course, the jitter is horrible, but at least it is possible to debug with 
GDB.

Cheers,

Francis


Re: Ping: [PATCH v15 00/13] support "task_isolation" mode

2016-09-07 Thread Francis Giraldeau
On 2016-09-07 05:11 PM, Francis Giraldeau wrote:
> The syscall test fails on x86:
> $ sudo ./isolation
> [...]
> test_syscall: FAIL (0x100)
> test_syscall (SIGUSR1): FAIL (0x100)
>
> I wanted to debug this problem with gdb and a KVM virtual machine. However, 
> the TSC clock source is detected as non reliable, even with the boot 
> parameter tsc=reliable, and therefore prctl(PR_SET_TASK_ISOLATION, 
> PR_TASK_ISOLATION_ENABLE) always returns EAGAIN. Is there a trick to run task 
> isolation in a VM (at least for debugging purposes)?

OK, got it. The guest kernel must be compiled with CONFIG_KVM_GUEST, and then 
with virsh edit, set the clock configuration of the VM (under ):

 

  

Of course, the jitter is horrible, but at least it is possible to debug with 
GDB.

Cheers,

Francis


Re: Ping: [PATCH v15 00/13] support "task_isolation" mode

2016-09-07 Thread Francis Giraldeau
On 2016-08-29 12:27 PM, Chris Metcalf wrote:
> On 8/16/2016 5:19 PM, Chris Metcalf wrote:
>> Here is a respin of the task-isolation patch set.
>
> No concerns have been raised yet with the v15 version of the patch series
> in the two weeks since I posted it, and I think I have addressed all
> previously-raised concerns (or perhaps people have just given up arguing
> with me).

There is a cycle with header include in the v15 patch set on x86_64 that cause 
a compilation error. You will find the patch (and other fixes) in the following 
branch:

https://github.com/giraldeau/linux/commits/dataplane-x86-fix-inc

In the test file, it is indicated to change timer-tick.c to disable the 1Hz 
tick, is there a reason why the change is not in the patch set? I added this 
trivial change in the branch dataplane-x86-fix-inc above.

The syscall test fails on x86:

$ sudo ./isolation
[...]
test_syscall: FAIL (0x100)
test_syscall (SIGUSR1): FAIL (0x100)

I wanted to debug this problem with gdb and a KVM virtual machine. However, the 
TSC clock source is detected as non reliable, even with the boot parameter 
tsc=reliable, and therefore prctl(PR_SET_TASK_ISOLATION, 
PR_TASK_ISOLATION_ENABLE) always returns EAGAIN. Is there a trick to run task 
isolation in a VM (at least for debugging purposes)?

BTW, this was causing the test to enter an infinite loop. If the clock source 
is not reliable, maybe a different error code should be returned, because this 
situation not transient. In the mean time, I added a check for 100 max retries 
in the test (prctl_safe()).

When running only the test_jitter(), the isolation mode is lost:

[ 6741.566048] isolation/9515: task_isolation mode lost due to irq_work

With ftrace (events/workqueue/workqueue_execute_start), I get a bit more info:

 kworker/1:1-676   [001]   6610.097128: workqueue_execute_start: work 
struct 8801a784ca20: function dbs_work_handler

The governor was ondemand, so I tried to set the frequency scaling governor to 
performance, but that does not solve the issue. Is there a way to suppress this 
irq_work? Should we run the isolated task with high real-time priority, such 
that it never get preempted?

Thanks!

Francis


Re: Ping: [PATCH v15 00/13] support "task_isolation" mode

2016-09-07 Thread Francis Giraldeau
On 2016-08-29 12:27 PM, Chris Metcalf wrote:
> On 8/16/2016 5:19 PM, Chris Metcalf wrote:
>> Here is a respin of the task-isolation patch set.
>
> No concerns have been raised yet with the v15 version of the patch series
> in the two weeks since I posted it, and I think I have addressed all
> previously-raised concerns (or perhaps people have just given up arguing
> with me).

There is a cycle with header include in the v15 patch set on x86_64 that cause 
a compilation error. You will find the patch (and other fixes) in the following 
branch:

https://github.com/giraldeau/linux/commits/dataplane-x86-fix-inc

In the test file, it is indicated to change timer-tick.c to disable the 1Hz 
tick, is there a reason why the change is not in the patch set? I added this 
trivial change in the branch dataplane-x86-fix-inc above.

The syscall test fails on x86:

$ sudo ./isolation
[...]
test_syscall: FAIL (0x100)
test_syscall (SIGUSR1): FAIL (0x100)

I wanted to debug this problem with gdb and a KVM virtual machine. However, the 
TSC clock source is detected as non reliable, even with the boot parameter 
tsc=reliable, and therefore prctl(PR_SET_TASK_ISOLATION, 
PR_TASK_ISOLATION_ENABLE) always returns EAGAIN. Is there a trick to run task 
isolation in a VM (at least for debugging purposes)?

BTW, this was causing the test to enter an infinite loop. If the clock source 
is not reliable, maybe a different error code should be returned, because this 
situation not transient. In the mean time, I added a check for 100 max retries 
in the test (prctl_safe()).

When running only the test_jitter(), the isolation mode is lost:

[ 6741.566048] isolation/9515: task_isolation mode lost due to irq_work

With ftrace (events/workqueue/workqueue_execute_start), I get a bit more info:

 kworker/1:1-676   [001]   6610.097128: workqueue_execute_start: work 
struct 8801a784ca20: function dbs_work_handler

The governor was ondemand, so I tried to set the frequency scaling governor to 
performance, but that does not solve the issue. Is there a way to suppress this 
irq_work? Should we run the isolated task with high real-time priority, such 
that it never get preempted?

Thanks!

Francis


Re: clocksource_watchdog causing scheduling of timers every second (was [v13] support "task_isolation" mode)

2016-07-29 Thread Francis Giraldeau
I tested this patch on 4.7 and confirm that irq_work does not occurs anymore on
the isolated cpu. Thanks!

I don't know of any utility to test the task isolation feature, so I started
one:

https://github.com/giraldeau/taskisol

The script exp.sh runs the taskisol to test five different conditions, but some
behavior is not the one I would expect.

At startup, it does:
 - register a custom signal handler for SIGUSR1
 - sched_setaffinity() on CPU 1, which is isolated
 - mlockall(MCL_CURRENT) to prevent undesired page faults

The default strict mode is set with:

prctl(PR_SET_TASK_ISOLATION, PR_TASK_ISOLATION_ENABLE)

And then, the syscall write() is called. From previous discussion, the SIGKILL
should be sent, but it does not occur. When instead of calling write() we force
a page fault, then the SIGKILL is correctly sent.

When instead a custom signal handler SIGUSR1:

prctl(PR_SET_TASK_ISOLATION, PR_TASK_ISOLATION_USERSIG |
  PR_TASK_ISOLATION_SET_SIG(SIGUSR1)

The signal is never delivered, either when the syscall is issued nor when the
page fault occurs.

I can confirm that, if two taskisol are created on the same CPU, the second one
fails with Resource temporarily unavailable, so that's fine.

I can add more test cases depending on your comments, such as the TLB events
triggered by another thread on a non-isolated core. But maybe there is already
a test suite?

Francis

2016-07-27 15:58 GMT-04:00 Chris Metcalf :
> On 7/27/2016 3:53 PM, Christoph Lameter wrote:
>>
>> On Wed, 27 Jul 2016, Chris Metcalf wrote:
>>
>>> Looks good.  Did you omit the equivalent fix in
>>> clocksource_start_watchdog()
>>> on purpose?  For now I just took your change, but tweaked it to add the
>>> equivalent diff with cpumask_first_and() there.
>>
>> Can the watchdog be started on an isolated cpu at all? I would expect that
>> the code would start a watchdog only on a housekeeping cpu.
>
>
> The code just starts the watchdog initially on the first online cpu.
> In principle you could have configured that as an isolated cpu, so
> without any change to that code, you'd interrupt that cpu.
>
> I guess another way to slice it would be to start the watchdog on the
> current core.  But just using the same idiom as in clocksource_watchdog()
> seems cleanest to me.
>
> I added your patch to the series and pushed it up (along with adding your
> Tested-by to the x86 enablement commit).  It's still based on 4.6 so I'll
> need
> to rebase it once the merge window closes.
>
>
> --
> Chris Metcalf, Mellanox Technologies
> http://www.mellanox.com
>


Re: clocksource_watchdog causing scheduling of timers every second (was [v13] support "task_isolation" mode)

2016-07-29 Thread Francis Giraldeau
I tested this patch on 4.7 and confirm that irq_work does not occurs anymore on
the isolated cpu. Thanks!

I don't know of any utility to test the task isolation feature, so I started
one:

https://github.com/giraldeau/taskisol

The script exp.sh runs the taskisol to test five different conditions, but some
behavior is not the one I would expect.

At startup, it does:
 - register a custom signal handler for SIGUSR1
 - sched_setaffinity() on CPU 1, which is isolated
 - mlockall(MCL_CURRENT) to prevent undesired page faults

The default strict mode is set with:

prctl(PR_SET_TASK_ISOLATION, PR_TASK_ISOLATION_ENABLE)

And then, the syscall write() is called. From previous discussion, the SIGKILL
should be sent, but it does not occur. When instead of calling write() we force
a page fault, then the SIGKILL is correctly sent.

When instead a custom signal handler SIGUSR1:

prctl(PR_SET_TASK_ISOLATION, PR_TASK_ISOLATION_USERSIG |
  PR_TASK_ISOLATION_SET_SIG(SIGUSR1)

The signal is never delivered, either when the syscall is issued nor when the
page fault occurs.

I can confirm that, if two taskisol are created on the same CPU, the second one
fails with Resource temporarily unavailable, so that's fine.

I can add more test cases depending on your comments, such as the TLB events
triggered by another thread on a non-isolated core. But maybe there is already
a test suite?

Francis

2016-07-27 15:58 GMT-04:00 Chris Metcalf :
> On 7/27/2016 3:53 PM, Christoph Lameter wrote:
>>
>> On Wed, 27 Jul 2016, Chris Metcalf wrote:
>>
>>> Looks good.  Did you omit the equivalent fix in
>>> clocksource_start_watchdog()
>>> on purpose?  For now I just took your change, but tweaked it to add the
>>> equivalent diff with cpumask_first_and() there.
>>
>> Can the watchdog be started on an isolated cpu at all? I would expect that
>> the code would start a watchdog only on a housekeeping cpu.
>
>
> The code just starts the watchdog initially on the first online cpu.
> In principle you could have configured that as an isolated cpu, so
> without any change to that code, you'd interrupt that cpu.
>
> I guess another way to slice it would be to start the watchdog on the
> current core.  But just using the same idiom as in clocksource_watchdog()
> seems cleanest to me.
>
> I added your patch to the series and pushed it up (along with adding your
> Tested-by to the x86 enablement commit).  It's still based on 4.6 so I'll
> need
> to rebase it once the merge window closes.
>
>
> --
> Chris Metcalf, Mellanox Technologies
> http://www.mellanox.com
>


[PATCH] Prevent infinite loop if unwind fails

2013-11-22 Thread Francis Giraldeau
Unwind may loop forever if sample is incomplete or bogus. Bound the number of
unw_step() that yield the same IP to prevent infinite loop.

Signed-off-by: Francis Giraldeau 
---
 tools/perf/util/unwind.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/unwind.c b/tools/perf/util/unwind.c
index 958723b..bf45b6b 100644
--- a/tools/perf/util/unwind.c
+++ b/tools/perf/util/unwind.c
@@ -520,7 +520,8 @@ static int get_entries(struct unwind_info *ui, 
unwind_entry_cb_t cb,
 {
unw_addr_space_t addr_space;
unw_cursor_t c;
-   int ret;
+   unw_word_t prev_ip = 0;
+   int ret, rec = 0;
 
addr_space = unw_create_addr_space(, 0);
if (!addr_space) {
@@ -537,6 +538,11 @@ static int get_entries(struct unwind_info *ui, 
unwind_entry_cb_t cb,
 
unw_get_reg(, UNW_REG_IP, );
ret = entry(ip, ui->thread, ui->machine, cb, arg);
+   if (prev_ip != ip)
+   rec = 0;
+   if (++rec >= 100)
+   break;
+   prev_ip = ip;
}
 
unw_destroy_addr_space(addr_space);
-- 
1.8.1.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] Prevent infinite loop if unwind fails

2013-11-22 Thread Francis Giraldeau
Unwind may loop forever if sample is incomplete or bogus. Bound the number of
unw_step() that yield the same IP to prevent infinite loop.

Signed-off-by: Francis Giraldeau francis.girald...@gmail.com
---
 tools/perf/util/unwind.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/unwind.c b/tools/perf/util/unwind.c
index 958723b..bf45b6b 100644
--- a/tools/perf/util/unwind.c
+++ b/tools/perf/util/unwind.c
@@ -520,7 +520,8 @@ static int get_entries(struct unwind_info *ui, 
unwind_entry_cb_t cb,
 {
unw_addr_space_t addr_space;
unw_cursor_t c;
-   int ret;
+   unw_word_t prev_ip = 0;
+   int ret, rec = 0;
 
addr_space = unw_create_addr_space(accessors, 0);
if (!addr_space) {
@@ -537,6 +538,11 @@ static int get_entries(struct unwind_info *ui, 
unwind_entry_cb_t cb,
 
unw_get_reg(c, UNW_REG_IP, ip);
ret = entry(ip, ui-thread, ui-machine, cb, arg);
+   if (prev_ip != ip)
+   rec = 0;
+   if (++rec = 100)
+   break;
+   prev_ip = ip;
}
 
unw_destroy_addr_space(addr_space);
-- 
1.8.1.2

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/