Re: [RFC PATCH 2/2] pipe: use pipe busy wait

2018-09-17 Thread Subhra Mazumdar




On 09/17/2018 03:43 PM, Peter Zijlstra wrote:

On Mon, Sep 17, 2018 at 02:05:40PM -0700, Subhra Mazumdar wrote:

On 09/07/2018 05:25 AM, Peter Zijlstra wrote:

Why not just busy wait on current->state ? A little something like:

diff --git a/fs/pipe.c b/fs/pipe.c
index bdc5d3c0977d..8d9f1c95ff99 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -106,6 +106,7 @@ void pipe_double_lock(struct pipe_inode_info *pipe1,
  void pipe_wait(struct pipe_inode_info *pipe)
  {
DEFINE_WAIT(wait);
+   u64 start;
/*
 * Pipes are system-local resources, so sleeping on them
@@ -113,7 +114,15 @@ void pipe_wait(struct pipe_inode_info *pipe)
 */
prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE);
pipe_unlock(pipe);
-   schedule();
+
+   preempt_disable();
+   start = local_clock();
+   while (!need_resched() && current->state != TASK_RUNNING &&
+   (local_clock() - start) < pipe->poll_usec)
+   cpu_relax();
+   schedule_preempt_disabled();
+   preempt_enable();
+
finish_wait(&pipe->wait, &wait);
pipe_lock(pipe);
  }

This will make the current thread always spin and block as it itself does
the state change to TASK_RUNNING in finish_wait.

Nah, the actual wakeup will also do that state change. The one in
finish_wait() is for the case where the wait condition became true
without wakeup, such that we don't 'leak' the INTERRUPTIBLE state.

Ok, it works. I see similar improvements with hackbench as the original
patch.


Re: [RFC PATCH 2/2] pipe: use pipe busy wait

2018-09-17 Thread Peter Zijlstra
On Mon, Sep 17, 2018 at 02:05:40PM -0700, Subhra Mazumdar wrote:
> On 09/07/2018 05:25 AM, Peter Zijlstra wrote:

> >Why not just busy wait on current->state ? A little something like:
> >
> >diff --git a/fs/pipe.c b/fs/pipe.c
> >index bdc5d3c0977d..8d9f1c95ff99 100644
> >--- a/fs/pipe.c
> >+++ b/fs/pipe.c
> >@@ -106,6 +106,7 @@ void pipe_double_lock(struct pipe_inode_info *pipe1,
> >  void pipe_wait(struct pipe_inode_info *pipe)
> >  {
> > DEFINE_WAIT(wait);
> >+u64 start;
> > /*
> >  * Pipes are system-local resources, so sleeping on them
> >@@ -113,7 +114,15 @@ void pipe_wait(struct pipe_inode_info *pipe)
> >  */
> > prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE);
> > pipe_unlock(pipe);
> >-schedule();
> >+
> >+preempt_disable();
> >+start = local_clock();
> >+while (!need_resched() && current->state != TASK_RUNNING &&
> >+(local_clock() - start) < pipe->poll_usec)
> >+cpu_relax();
> >+schedule_preempt_disabled();
> >+preempt_enable();
> >+
> > finish_wait(&pipe->wait, &wait);
> > pipe_lock(pipe);
> >  }

> This will make the current thread always spin and block as it itself does
> the state change to TASK_RUNNING in finish_wait.

Nah, the actual wakeup will also do that state change. The one in
finish_wait() is for the case where the wait condition became true
without wakeup, such that we don't 'leak' the INTERRUPTIBLE state.


Re: [RFC PATCH 2/2] pipe: use pipe busy wait

2018-09-17 Thread Subhra Mazumdar




On 09/07/2018 05:25 AM, Peter Zijlstra wrote:

On Thu, Aug 30, 2018 at 01:24:58PM -0700, subhra mazumdar wrote:

+void pipe_busy_wait(struct pipe_inode_info *pipe)
+{
+   unsigned long wait_flag = pipe->pipe_wait_flag;
+   unsigned long start_time = pipe_busy_loop_current_time();
+
+   pipe_unlock(pipe);
+   preempt_disable();
+   for (;;) {
+   if (pipe->pipe_wait_flag > wait_flag) {
+   preempt_enable();
+   pipe_lock(pipe);
+   return;
+   }
+   if (pipe_busy_loop_timeout(pipe, start_time))
+   break;
+   cpu_relax();
+   }
+   preempt_enable();
+   pipe_lock(pipe);
+   if (pipe->pipe_wait_flag > wait_flag)
+   return;
+   pipe_wait(pipe);
+}
+
+void wake_up_busy_poll(struct pipe_inode_info *pipe)
+{
+   pipe->pipe_wait_flag++;
+}

Why not just busy wait on current->state ? A little something like:

diff --git a/fs/pipe.c b/fs/pipe.c
index bdc5d3c0977d..8d9f1c95ff99 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -106,6 +106,7 @@ void pipe_double_lock(struct pipe_inode_info *pipe1,
  void pipe_wait(struct pipe_inode_info *pipe)
  {
DEFINE_WAIT(wait);
+   u64 start;
  
  	/*

 * Pipes are system-local resources, so sleeping on them
@@ -113,7 +114,15 @@ void pipe_wait(struct pipe_inode_info *pipe)
 */
prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE);
pipe_unlock(pipe);
-   schedule();
+
+   preempt_disable();
+   start = local_clock();
+   while (!need_resched() && current->state != TASK_RUNNING &&
+   (local_clock() - start) < pipe->poll_usec)
+   cpu_relax();
+   schedule_preempt_disabled();
+   preempt_enable();
+
finish_wait(&pipe->wait, &wait);
pipe_lock(pipe);
  }

This will make the current thread always spin and block as it itself does
the state change to TASK_RUNNING in finish_wait.


Re: [RFC PATCH 2/2] pipe: use pipe busy wait

2018-09-07 Thread Peter Zijlstra
On Thu, Aug 30, 2018 at 01:24:58PM -0700, subhra mazumdar wrote:
> +void pipe_busy_wait(struct pipe_inode_info *pipe)
> +{
> + unsigned long wait_flag = pipe->pipe_wait_flag;
> + unsigned long start_time = pipe_busy_loop_current_time();
> +
> + pipe_unlock(pipe);
> + preempt_disable();
> + for (;;) {
> + if (pipe->pipe_wait_flag > wait_flag) {
> + preempt_enable();
> + pipe_lock(pipe);
> + return;
> + }
> + if (pipe_busy_loop_timeout(pipe, start_time))
> + break;
> + cpu_relax();
> + }
> + preempt_enable();
> + pipe_lock(pipe);
> + if (pipe->pipe_wait_flag > wait_flag)
> + return;
> + pipe_wait(pipe);
> +}
> +
> +void wake_up_busy_poll(struct pipe_inode_info *pipe)
> +{
> + pipe->pipe_wait_flag++;
> +}

Why not just busy wait on current->state ? A little something like:

diff --git a/fs/pipe.c b/fs/pipe.c
index bdc5d3c0977d..8d9f1c95ff99 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -106,6 +106,7 @@ void pipe_double_lock(struct pipe_inode_info *pipe1,
 void pipe_wait(struct pipe_inode_info *pipe)
 {
DEFINE_WAIT(wait);
+   u64 start;
 
/*
 * Pipes are system-local resources, so sleeping on them
@@ -113,7 +114,15 @@ void pipe_wait(struct pipe_inode_info *pipe)
 */
prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE);
pipe_unlock(pipe);
-   schedule();
+
+   preempt_disable();
+   start = local_clock();
+   while (!need_resched() && current->state != TASK_RUNNING &&
+   (local_clock() - start) < pipe->poll_usec)
+   cpu_relax();
+   schedule_preempt_disabled();
+   preempt_enable();
+
finish_wait(&pipe->wait, &wait);
pipe_lock(pipe);
 }


Re: [RFC PATCH 2/2] pipe: use pipe busy wait

2018-09-04 Thread Subhra Mazumdar




On 09/04/2018 02:54 PM, Thomas Gleixner wrote:

On Thu, 30 Aug 2018, subhra mazumdar wrote:
  
+void pipe_busy_wait(struct pipe_inode_info *pipe)

+{
+   unsigned long wait_flag = pipe->pipe_wait_flag;
+   unsigned long start_time = pipe_busy_loop_current_time();
+
+   pipe_unlock(pipe);
+   preempt_disable();
+   for (;;) {
+   if (pipe->pipe_wait_flag > wait_flag) {
+   preempt_enable();
+   pipe_lock(pipe);
+   return;
+   }
+   if (pipe_busy_loop_timeout(pipe, start_time))
+   break;
+   cpu_relax();
+   }
+   preempt_enable();

You are not really serious about busy looping with preemption disabled?

That's just wrong. Why do you want to block others from getting on the CPU
if there is nothing in the pipe?

There is no point in doing so, really. If the wait loop is preempted
because there is more important work to do, then it will come back and
either see new data, or leave due to wait time reached.

Makes sense. I will remove it.

Thanks,
Subhra


Thanks,

tglx





Re: [RFC PATCH 2/2] pipe: use pipe busy wait

2018-09-04 Thread Thomas Gleixner
On Thu, 30 Aug 2018, subhra mazumdar wrote:
>  
> +void pipe_busy_wait(struct pipe_inode_info *pipe)
> +{
> + unsigned long wait_flag = pipe->pipe_wait_flag;
> + unsigned long start_time = pipe_busy_loop_current_time();
> +
> + pipe_unlock(pipe);
> + preempt_disable();
> + for (;;) {
> + if (pipe->pipe_wait_flag > wait_flag) {
> + preempt_enable();
> + pipe_lock(pipe);
> + return;
> + }
> + if (pipe_busy_loop_timeout(pipe, start_time))
> + break;
> + cpu_relax();
> + }
> + preempt_enable();

You are not really serious about busy looping with preemption disabled?

That's just wrong. Why do you want to block others from getting on the CPU
if there is nothing in the pipe?

There is no point in doing so, really. If the wait loop is preempted
because there is more important work to do, then it will come back and
either see new data, or leave due to wait time reached.

Thanks,

tglx



[RFC PATCH 2/2] pipe: use pipe busy wait

2018-08-30 Thread subhra mazumdar
Enable busy waiting for pipes. pipe_busy_wait is called if pipe is empty or
full which spins for specified micro seconds. wake_up_busy_poll is called
when data is written or read to signal any busy waiting threads. A tunable
pipe_busy_poll is introduced to enable or disable busy waiting via /proc.
The value of it specifies the amount of spin in microseconds.

Signed-off-by: subhra mazumdar 
---
 fs/pipe.c | 58 +--
 include/linux/pipe_fs_i.h |  1 +
 kernel/sysctl.c   |  7 ++
 3 files changed, 64 insertions(+), 2 deletions(-)

diff --git a/fs/pipe.c b/fs/pipe.c
index 97e5be8..03ce76a 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -44,6 +44,7 @@ unsigned int pipe_min_size = PAGE_SIZE;
  */
 unsigned long pipe_user_pages_hard;
 unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
+unsigned int pipe_busy_poll;
 
 /*
  * We use a start+len construction, which provides full use of the 
@@ -122,6 +123,35 @@ void pipe_wait(struct pipe_inode_info *pipe)
pipe_lock(pipe);
 }
 
+void pipe_busy_wait(struct pipe_inode_info *pipe)
+{
+   unsigned long wait_flag = pipe->pipe_wait_flag;
+   unsigned long start_time = pipe_busy_loop_current_time();
+
+   pipe_unlock(pipe);
+   preempt_disable();
+   for (;;) {
+   if (pipe->pipe_wait_flag > wait_flag) {
+   preempt_enable();
+   pipe_lock(pipe);
+   return;
+   }
+   if (pipe_busy_loop_timeout(pipe, start_time))
+   break;
+   cpu_relax();
+   }
+   preempt_enable();
+   pipe_lock(pipe);
+   if (pipe->pipe_wait_flag > wait_flag)
+   return;
+   pipe_wait(pipe);
+}
+
+void wake_up_busy_poll(struct pipe_inode_info *pipe)
+{
+   pipe->pipe_wait_flag++;
+}
+
 static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
  struct pipe_buffer *buf)
 {
@@ -254,6 +284,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
struct pipe_inode_info *pipe = filp->private_data;
int do_wakeup;
ssize_t ret;
+   unsigned int poll = pipe->pipe_ll_usec;
 
/* Null read succeeds. */
if (unlikely(total_len == 0))
@@ -331,11 +362,18 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
break;
}
if (do_wakeup) {
+   if (poll)
+   wake_up_busy_poll(pipe);
wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT | 
POLLWRNORM);
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
}
-   pipe_wait(pipe);
+   if (poll)
+   pipe_busy_wait(pipe);
+   else
+   pipe_wait(pipe);
}
+   if (poll && do_wakeup)
+   wake_up_busy_poll(pipe);
__pipe_unlock(pipe);
 
/* Signal writers asynchronously that there is more room. */
@@ -362,6 +400,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
int do_wakeup = 0;
size_t total_len = iov_iter_count(from);
ssize_t chars;
+   unsigned int poll = pipe->pipe_ll_usec;
 
/* Null write succeeds. */
if (unlikely(total_len == 0))
@@ -467,15 +506,22 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
break;
}
if (do_wakeup) {
+   if (poll)
+   wake_up_busy_poll(pipe);
wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | 
POLLRDNORM);
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
do_wakeup = 0;
}
pipe->waiting_writers++;
-   pipe_wait(pipe);
+   if (poll)
+   pipe_busy_wait(pipe);
+   else
+   pipe_wait(pipe);
pipe->waiting_writers--;
}
 out:
+   if (poll && do_wakeup)
+   wake_up_busy_poll(pipe);
__pipe_unlock(pipe);
if (do_wakeup) {
wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | 
POLLRDNORM);
@@ -564,6 +610,7 @@ static int
 pipe_release(struct inode *inode, struct file *file)
 {
struct pipe_inode_info *pipe = file->private_data;
+   unsigned int poll = pipe->pipe_ll_usec;
 
__pipe_lock(pipe);
if (file->f_mode & FMODE_READ)
@@ -572,6 +619,8 @@ pipe_release(struct inode *inode, struct file *file)
pipe->writers--;
 
if (pipe->readers || pipe->writers) {
+   if (poll)
+   wake_up_busy_poll(pipe);
wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLOUT | 
POLLRDNORM | POLLWRNORM | POLLERR | POLLHUP);
kill_fasync(&pipe->fasyn