Re: [PATCH 20/20] iscsi: support for swapping over iSCSI.

2006-09-14 Thread Peter Zijlstra
On Wed, 2006-09-13 at 15:50 -0500, Mike Christie wrote:
 Peter Zijlstra wrote:
  Implement sht-swapdev() for iSCSI. This method takes care of reserving
  the extra memory needed and marking all relevant sockets with SOCK_VMIO.
  
  When used for swapping, TCP socket creation is done under GFP_MEMALLOC and
  the TCP connect is done with SOCK_VMIO to ensure their success. Also the
  netlink userspace interface is marked SOCK_VMIO, this will ensure that even
  under pressure we can still communicate with the daemon (which runs as
  mlockall() and needs no additional memory to operate).
  
  Netlink requests are handled under the new PF_MEM_NOWAIT when a swapper is
  present. This ensures that the netlink socket will not block. User-space 
  will
  need to retry failed requests.
  
  The TCP receive path is handled under PF_MEMALLOC for SOCK_VMIO sockets.
  This makes sure we do not block the critical socket, and that we do not
  fail to process incomming data.
  
  Signed-off-by: Peter Zijlstra [EMAIL PROTECTED]
  CC: Mike Christie [EMAIL PROTECTED]
  ---
   drivers/scsi/iscsi_tcp.c|  103 
  +++-
   drivers/scsi/scsi_transport_iscsi.c |   23 +++-
   include/scsi/libiscsi.h |1 
   include/scsi/scsi_transport_iscsi.h |2 
   4 files changed, 113 insertions(+), 16 deletions(-)
  
  Index: linux-2.6/drivers/scsi/iscsi_tcp.c
  ===
  --- linux-2.6.orig/drivers/scsi/iscsi_tcp.c
  +++ linux-2.6/drivers/scsi/iscsi_tcp.c
  @@ -42,6 +42,7 @@
   #include scsi/scsi_host.h
   #include scsi/scsi.h
   #include scsi/scsi_transport_iscsi.h
  +#include scsi/scsi_device.h
   
   #include iscsi_tcp.h
   
  @@ -845,9 +846,13 @@ iscsi_tcp_data_recv(read_descriptor_t *r
  int rc;
  struct iscsi_conn *conn = rd_desc-arg.data;
  struct iscsi_tcp_conn *tcp_conn = conn-dd_data;
  -   int processed;
  +   int processed = 0;
  char pad[ISCSI_PAD_LEN];
  struct scatterlist sg;
  +   unsigned long pflags = current-flags;
  +
  +   if (sk_has_vmio(tcp_conn-sock-sk))
  +   current-flags |= PF_MEMALLOC;
   
 
 Is this too late or not needed or what is it for? This function gets run
 from the network layer's softirq and at this point we have a skbuff with
 data that we want to process. The iscsi layer also does not allocate
 memory for read or write IO in this path.

I thought I found allocations in that path, lemme search...
found this:

iscsi_tcp_data_recv()
  iscsi_data_rescv()
iscsi_complete_pdu()
  __iscsi_complete_pdu()
iscsi_recv_pdu()
  alloc_skb( GFP_ATOMIC);

 I think we would want to set this flag at a lower level. Something
 closer to where the skbuf is allocated?

Is that the skbuff you were talking about? If so, I'd need to carve a
path to pass the swapper information. I had that in a previous patch,
but that was large and ugly. I had to go carrying gfp_t flags all
through that call chain.

I could try again if you prefer that.

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 20/20] iscsi: support for swapping over iSCSI.

2006-09-14 Thread Mike Christie
Peter Zijlstra wrote:
 On Wed, 2006-09-13 at 15:50 -0500, Mike Christie wrote:
 Peter Zijlstra wrote:
 Implement sht-swapdev() for iSCSI. This method takes care of reserving
 the extra memory needed and marking all relevant sockets with SOCK_VMIO.

 When used for swapping, TCP socket creation is done under GFP_MEMALLOC and
 the TCP connect is done with SOCK_VMIO to ensure their success. Also the
 netlink userspace interface is marked SOCK_VMIO, this will ensure that even
 under pressure we can still communicate with the daemon (which runs as
 mlockall() and needs no additional memory to operate).

 Netlink requests are handled under the new PF_MEM_NOWAIT when a swapper is
 present. This ensures that the netlink socket will not block. User-space 
 will
 need to retry failed requests.

 The TCP receive path is handled under PF_MEMALLOC for SOCK_VMIO sockets.
 This makes sure we do not block the critical socket, and that we do not
 fail to process incomming data.

 Signed-off-by: Peter Zijlstra [EMAIL PROTECTED]
 CC: Mike Christie [EMAIL PROTECTED]
 ---
  drivers/scsi/iscsi_tcp.c|  103 
 +++-
  drivers/scsi/scsi_transport_iscsi.c |   23 +++-
  include/scsi/libiscsi.h |1 
  include/scsi/scsi_transport_iscsi.h |2 
  4 files changed, 113 insertions(+), 16 deletions(-)

 Index: linux-2.6/drivers/scsi/iscsi_tcp.c
 ===
 --- linux-2.6.orig/drivers/scsi/iscsi_tcp.c
 +++ linux-2.6/drivers/scsi/iscsi_tcp.c
 @@ -42,6 +42,7 @@
  #include scsi/scsi_host.h
  #include scsi/scsi.h
  #include scsi/scsi_transport_iscsi.h
 +#include scsi/scsi_device.h
  
  #include iscsi_tcp.h
  
 @@ -845,9 +846,13 @@ iscsi_tcp_data_recv(read_descriptor_t *r
 int rc;
 struct iscsi_conn *conn = rd_desc-arg.data;
 struct iscsi_tcp_conn *tcp_conn = conn-dd_data;
 -   int processed;
 +   int processed = 0;
 char pad[ISCSI_PAD_LEN];
 struct scatterlist sg;
 +   unsigned long pflags = current-flags;
 +
 +   if (sk_has_vmio(tcp_conn-sock-sk))
 +   current-flags |= PF_MEMALLOC;
  
 Is this too late or not needed or what is it for? This function gets run
 from the network layer's softirq and at this point we have a skbuff with
 data that we want to process. The iscsi layer also does not allocate
 memory for read or write IO in this path.
 
 I thought I found allocations in that path, lemme search...
 found this:
 
 iscsi_tcp_data_recv()
   iscsi_data_rescv()
 iscsi_complete_pdu()
   __iscsi_complete_pdu()
 iscsi_recv_pdu()
   alloc_skb( GFP_ATOMIC);
 

You are right that is for the netlink interface. Could we move the
PF_MEMALLOC setting and clearing to iscsi_recv_pdu and and add it to
iscsi_conn_error in scsi_transport_iscsi.c so that iscsi_iser and
qla4xxx will have it set when they need it. I will send a patch for this
along with a way to have the netlink sock vmio set for all iscsi drivers
that need it.


 I think we would want to set this flag at a lower level. Something
 closer to where the skbuf is allocated?
 
 Is that the skbuff you were talking about? If so, I'd need to carve a
 path to pass the swapper information. I had that in a previous patch,
 but that was large and ugly. I had to go carrying gfp_t flags all
 through that call chain.
 

In my original post I was just concerned about the sk_buff that gets
passed to the iscsi layer in iscsi_tcp_data_recv. I was wondering if the
chunk of code in the network layer or network driver that allocated that
skbuff needed to set PF_MEMALLOC.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 20/20] iscsi: support for swapping over iSCSI.

2006-09-14 Thread Peter Zijlstra
On Thu, 2006-09-14 at 14:22 -0500, Mike Christie wrote:
 Peter Zijlstra wrote:
  On Wed, 2006-09-13 at 15:50 -0500, Mike Christie wrote:
  Peter Zijlstra wrote:
  Implement sht-swapdev() for iSCSI. This method takes care of reserving
  the extra memory needed and marking all relevant sockets with SOCK_VMIO.
 
  When used for swapping, TCP socket creation is done under GFP_MEMALLOC and
  the TCP connect is done with SOCK_VMIO to ensure their success. Also the
  netlink userspace interface is marked SOCK_VMIO, this will ensure that 
  even
  under pressure we can still communicate with the daemon (which runs as
  mlockall() and needs no additional memory to operate).
 
  Netlink requests are handled under the new PF_MEM_NOWAIT when a swapper is
  present. This ensures that the netlink socket will not block. User-space 
  will
  need to retry failed requests.
 
  The TCP receive path is handled under PF_MEMALLOC for SOCK_VMIO sockets.
  This makes sure we do not block the critical socket, and that we do not
  fail to process incomming data.
 
  Signed-off-by: Peter Zijlstra [EMAIL PROTECTED]
  CC: Mike Christie [EMAIL PROTECTED]
  ---
   drivers/scsi/iscsi_tcp.c|  103 
  +++-
   drivers/scsi/scsi_transport_iscsi.c |   23 +++-
   include/scsi/libiscsi.h |1 
   include/scsi/scsi_transport_iscsi.h |2 
   4 files changed, 113 insertions(+), 16 deletions(-)
 
  Index: linux-2.6/drivers/scsi/iscsi_tcp.c
  ===
  --- linux-2.6.orig/drivers/scsi/iscsi_tcp.c
  +++ linux-2.6/drivers/scsi/iscsi_tcp.c
  @@ -42,6 +42,7 @@
   #include scsi/scsi_host.h
   #include scsi/scsi.h
   #include scsi/scsi_transport_iscsi.h
  +#include scsi/scsi_device.h
   
   #include iscsi_tcp.h
   
  @@ -845,9 +846,13 @@ iscsi_tcp_data_recv(read_descriptor_t *r
int rc;
struct iscsi_conn *conn = rd_desc-arg.data;
struct iscsi_tcp_conn *tcp_conn = conn-dd_data;
  - int processed;
  + int processed = 0;
char pad[ISCSI_PAD_LEN];
struct scatterlist sg;
  + unsigned long pflags = current-flags;
  +
  + if (sk_has_vmio(tcp_conn-sock-sk))
  + current-flags |= PF_MEMALLOC;
   
  Is this too late or not needed or what is it for? This function gets run
  from the network layer's softirq and at this point we have a skbuff with
  data that we want to process. The iscsi layer also does not allocate
  memory for read or write IO in this path.
  
  I thought I found allocations in that path, lemme search...
  found this:
  
  iscsi_tcp_data_recv()
iscsi_data_rescv()
  iscsi_complete_pdu()
__iscsi_complete_pdu()
  iscsi_recv_pdu()
alloc_skb( GFP_ATOMIC);
  
 
 You are right that is for the netlink interface. Could we move the
 PF_MEMALLOC setting and clearing to iscsi_recv_pdu and and add it to
 iscsi_conn_error in scsi_transport_iscsi.c so that iscsi_iser and
 qla4xxx will have it set when they need it. I will send a patch for this
 along with a way to have the netlink sock vmio set for all iscsi drivers
 that need it.

I already have such a patch, look at:
http://programming.kicks-ass.net/kernel-patches/vm_deadlock/current/iscsi_vmio.patch

but what conditional do you want to use for PF_MEMALLOC, an
unconditional setting will be highly unpopular.

Hmm, perhaps you could key it of sk_has_vmio(nls)...

  I think we would want to set this flag at a lower level. Something
  closer to where the skbuf is allocated?
  
  Is that the skbuff you were talking about? If so, I'd need to carve a
  path to pass the swapper information. I had that in a previous patch,
  but that was large and ugly. I had to go carrying gfp_t flags all
  through that call chain.
  
 
 In my original post I was just concerned about the sk_buff that gets
 passed to the iscsi layer in iscsi_tcp_data_recv. I was wondering if the
 chunk of code in the network layer or network driver that allocated that
 skbuff needed to set PF_MEMALLOC.

(yeah I got that)

No, that got allocated because its a receive skb and !sk_vmio_socks(),
and got passed up because sk_has_vmio(iscsi_tcp_conn-sock-sk).


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 20/20] iscsi: support for swapping over iSCSI.

2006-09-14 Thread Peter Zijlstra
On Thu, 2006-09-14 at 22:35 +0200, Peter Zijlstra wrote:
 On Thu, 2006-09-14 at 14:22 -0500, Mike Christie wrote:

   I thought I found allocations in that path, lemme search...
   found this:
   
   iscsi_tcp_data_recv()
 iscsi_data_rescv()
   iscsi_complete_pdu()
 __iscsi_complete_pdu()
   iscsi_recv_pdu()
 alloc_skb( GFP_ATOMIC);
   
  
  You are right that is for the netlink interface. Could we move the
  PF_MEMALLOC setting and clearing to iscsi_recv_pdu and and add it to
  iscsi_conn_error in scsi_transport_iscsi.c so that iscsi_iser and
  qla4xxx will have it set when they need it. I will send a patch for this
  along with a way to have the netlink sock vmio set for all iscsi drivers
  that need it.
 
 I already have such a patch, look at:
 http://programming.kicks-ass.net/kernel-patches/vm_deadlock/current/iscsi_vmio.patch
 
 but what conditional do you want to use for PF_MEMALLOC, an
 unconditional setting will be highly unpopular.
 
 Hmm, perhaps you could key it of sk_has_vmio(nls)...

On second thought, not such a good idea, that will still be too course.
You only want to force feed stuff originating from
sk_has_vmio(iscsi_tcp_conn-sock-sk) connections, not all
connectections as soon as there is a swapper in the system.

In order to preserve that information you need extra state, abusing this
process flags is as good as propagating __GFP_EMERGENCY down the call
chain with extra gfp_t arguments, perhaps even better, since it will
make sure we catch all allocations.



-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 20/20] iscsi: support for swapping over iSCSI.

2006-09-14 Thread Mike Christie
Peter Zijlstra wrote:
 On Thu, 2006-09-14 at 14:22 -0500, Mike Christie wrote:
 Peter Zijlstra wrote:
 On Wed, 2006-09-13 at 15:50 -0500, Mike Christie wrote:
 Peter Zijlstra wrote:
 Implement sht-swapdev() for iSCSI. This method takes care of reserving
 the extra memory needed and marking all relevant sockets with SOCK_VMIO.

 When used for swapping, TCP socket creation is done under GFP_MEMALLOC and
 the TCP connect is done with SOCK_VMIO to ensure their success. Also the
 netlink userspace interface is marked SOCK_VMIO, this will ensure that 
 even
 under pressure we can still communicate with the daemon (which runs as
 mlockall() and needs no additional memory to operate).

 Netlink requests are handled under the new PF_MEM_NOWAIT when a swapper is
 present. This ensures that the netlink socket will not block. User-space 
 will
 need to retry failed requests.

 The TCP receive path is handled under PF_MEMALLOC for SOCK_VMIO sockets.
 This makes sure we do not block the critical socket, and that we do not
 fail to process incomming data.

 Signed-off-by: Peter Zijlstra [EMAIL PROTECTED]
 CC: Mike Christie [EMAIL PROTECTED]
 ---
  drivers/scsi/iscsi_tcp.c|  103 
 +++-
  drivers/scsi/scsi_transport_iscsi.c |   23 +++-
  include/scsi/libiscsi.h |1 
  include/scsi/scsi_transport_iscsi.h |2 
  4 files changed, 113 insertions(+), 16 deletions(-)

 Index: linux-2.6/drivers/scsi/iscsi_tcp.c
 ===
 --- linux-2.6.orig/drivers/scsi/iscsi_tcp.c
 +++ linux-2.6/drivers/scsi/iscsi_tcp.c
 @@ -42,6 +42,7 @@
  #include scsi/scsi_host.h
  #include scsi/scsi.h
  #include scsi/scsi_transport_iscsi.h
 +#include scsi/scsi_device.h
  
  #include iscsi_tcp.h
  
 @@ -845,9 +846,13 @@ iscsi_tcp_data_recv(read_descriptor_t *r
   int rc;
   struct iscsi_conn *conn = rd_desc-arg.data;
   struct iscsi_tcp_conn *tcp_conn = conn-dd_data;
 - int processed;
 + int processed = 0;
   char pad[ISCSI_PAD_LEN];
   struct scatterlist sg;
 + unsigned long pflags = current-flags;
 +
 + if (sk_has_vmio(tcp_conn-sock-sk))
 + current-flags |= PF_MEMALLOC;
  
 Is this too late or not needed or what is it for? This function gets run
 from the network layer's softirq and at this point we have a skbuff with
 data that we want to process. The iscsi layer also does not allocate
 memory for read or write IO in this path.
 I thought I found allocations in that path, lemme search...
 found this:

 iscsi_tcp_data_recv()
   iscsi_data_rescv()
 iscsi_complete_pdu()
   __iscsi_complete_pdu()
 iscsi_recv_pdu()
   alloc_skb( GFP_ATOMIC);

 You are right that is for the netlink interface. Could we move the
 PF_MEMALLOC setting and clearing to iscsi_recv_pdu and and add it to
 iscsi_conn_error in scsi_transport_iscsi.c so that iscsi_iser and
 qla4xxx will have it set when they need it. I will send a patch for this
 along with a way to have the netlink sock vmio set for all iscsi drivers
 that need it.
 
 I already have such a patch, look at:
 http://programming.kicks-ass.net/kernel-patches/vm_deadlock/current/iscsi_vmio.patch
 

You are drowning me in patches :) I did not see that one. I was still
commenting on this patch :)

The new patch looks ok.


 but what conditional do you want to use for PF_MEMALLOC, an
 unconditional setting will be highly unpopular.
 
 Hmm, perhaps you could key it of sk_has_vmio(nls)...

Yes.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 20/20] iscsi: support for swapping over iSCSI.

2006-09-14 Thread Mike Christie
Mike Christie wrote:
 Peter Zijlstra wrote:
 On Thu, 2006-09-14 at 14:22 -0500, Mike Christie wrote:
 Peter Zijlstra wrote:
 On Wed, 2006-09-13 at 15:50 -0500, Mike Christie wrote:
 Peter Zijlstra wrote:
 Implement sht-swapdev() for iSCSI. This method takes care of reserving
 the extra memory needed and marking all relevant sockets with SOCK_VMIO.

 When used for swapping, TCP socket creation is done under GFP_MEMALLOC 
 and
 the TCP connect is done with SOCK_VMIO to ensure their success. Also the
 netlink userspace interface is marked SOCK_VMIO, this will ensure that 
 even
 under pressure we can still communicate with the daemon (which runs as
 mlockall() and needs no additional memory to operate).

 Netlink requests are handled under the new PF_MEM_NOWAIT when a swapper 
 is
 present. This ensures that the netlink socket will not block. User-space 
 will
 need to retry failed requests.

 The TCP receive path is handled under PF_MEMALLOC for SOCK_VMIO sockets.
 This makes sure we do not block the critical socket, and that we do not
 fail to process incomming data.

 Signed-off-by: Peter Zijlstra [EMAIL PROTECTED]
 CC: Mike Christie [EMAIL PROTECTED]
 ---
  drivers/scsi/iscsi_tcp.c|  103 
 +++-
  drivers/scsi/scsi_transport_iscsi.c |   23 +++-
  include/scsi/libiscsi.h |1 
  include/scsi/scsi_transport_iscsi.h |2 
  4 files changed, 113 insertions(+), 16 deletions(-)

 Index: linux-2.6/drivers/scsi/iscsi_tcp.c
 ===
 --- linux-2.6.orig/drivers/scsi/iscsi_tcp.c
 +++ linux-2.6/drivers/scsi/iscsi_tcp.c
 @@ -42,6 +42,7 @@
  #include scsi/scsi_host.h
  #include scsi/scsi.h
  #include scsi/scsi_transport_iscsi.h
 +#include scsi/scsi_device.h
  
  #include iscsi_tcp.h
  
 @@ -845,9 +846,13 @@ iscsi_tcp_data_recv(read_descriptor_t *r
  int rc;
  struct iscsi_conn *conn = rd_desc-arg.data;
  struct iscsi_tcp_conn *tcp_conn = conn-dd_data;
 -int processed;
 +int processed = 0;
  char pad[ISCSI_PAD_LEN];
  struct scatterlist sg;
 +unsigned long pflags = current-flags;
 +
 +if (sk_has_vmio(tcp_conn-sock-sk))
 +current-flags |= PF_MEMALLOC;
  
 Is this too late or not needed or what is it for? This function gets run
 from the network layer's softirq and at this point we have a skbuff with
 data that we want to process. The iscsi layer also does not allocate
 memory for read or write IO in this path.
 I thought I found allocations in that path, lemme search...
 found this:

 iscsi_tcp_data_recv()
   iscsi_data_rescv()
 iscsi_complete_pdu()
   __iscsi_complete_pdu()
 iscsi_recv_pdu()
   alloc_skb( GFP_ATOMIC);

 You are right that is for the netlink interface. Could we move the
 PF_MEMALLOC setting and clearing to iscsi_recv_pdu and and add it to
 iscsi_conn_error in scsi_transport_iscsi.c so that iscsi_iser and
 qla4xxx will have it set when they need it. I will send a patch for this
 along with a way to have the netlink sock vmio set for all iscsi drivers
 that need it.
 I already have such a patch, look at:
 http://programming.kicks-ass.net/kernel-patches/vm_deadlock/current/iscsi_vmio.patch

 
 You are drowning me in patches :) I did not see that one. I was still
 commenting on this patch :)
 
 The new patch looks ok.
 

Oh, I think you need a sock_put to go with netlink lookup (lookup does a
hold).
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 20/20] iscsi: support for swapping over iSCSI.

2006-09-14 Thread Mike Christie
Peter Zijlstra wrote:
 On Thu, 2006-09-14 at 22:35 +0200, Peter Zijlstra wrote:
 On Thu, 2006-09-14 at 14:22 -0500, Mike Christie wrote:
 
 I thought I found allocations in that path, lemme search...
 found this:

 iscsi_tcp_data_recv()
   iscsi_data_rescv()
 iscsi_complete_pdu()
   __iscsi_complete_pdu()
 iscsi_recv_pdu()
   alloc_skb( GFP_ATOMIC);

 You are right that is for the netlink interface. Could we move the
 PF_MEMALLOC setting and clearing to iscsi_recv_pdu and and add it to
 iscsi_conn_error in scsi_transport_iscsi.c so that iscsi_iser and
 qla4xxx will have it set when they need it. I will send a patch for this
 along with a way to have the netlink sock vmio set for all iscsi drivers
 that need it.
 I already have such a patch, look at:
 http://programming.kicks-ass.net/kernel-patches/vm_deadlock/current/iscsi_vmio.patch

 but what conditional do you want to use for PF_MEMALLOC, an
 unconditional setting will be highly unpopular.

 Hmm, perhaps you could key it of sk_has_vmio(nls)...
 
 On second thought, not such a good idea, that will still be too course.
 You only want to force feed stuff originating from
 sk_has_vmio(iscsi_tcp_conn-sock-sk) connections, not all
 connectections as soon as there is a swapper in the system.
 

You can move the iscsi_session-swapper field to the iscsi_cls_session
and have iscsi_swapdev take a iscsi_cls_session and set that flag.
iscsi_recv_pdu and iscsi_conn_error and all the llds can then access
this bit.

 In order to preserve that information you need extra state, abusing this
 process flags is as good as propagating __GFP_EMERGENCY down the call
 chain with extra gfp_t arguments, perhaps even better, since it will
 make sure we catch all allocations.
 
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 20/20] iscsi: support for swapping over iSCSI.

2006-09-14 Thread Peter Zijlstra
On Thu, 2006-09-14 at 16:03 -0500, Mike Christie wrote:
 Mike Christie wrote:
  Peter Zijlstra wrote:
  On Thu, 2006-09-14 at 14:22 -0500, Mike Christie wrote:
  Peter Zijlstra wrote:
  On Wed, 2006-09-13 at 15:50 -0500, Mike Christie wrote:
  Peter Zijlstra wrote:
  Implement sht-swapdev() for iSCSI. This method takes care of reserving
  the extra memory needed and marking all relevant sockets with 
  SOCK_VMIO.
 
  When used for swapping, TCP socket creation is done under GFP_MEMALLOC 
  and
  the TCP connect is done with SOCK_VMIO to ensure their success. Also 
  the
  netlink userspace interface is marked SOCK_VMIO, this will ensure that 
  even
  under pressure we can still communicate with the daemon (which runs as
  mlockall() and needs no additional memory to operate).
 
  Netlink requests are handled under the new PF_MEM_NOWAIT when a 
  swapper is
  present. This ensures that the netlink socket will not block. 
  User-space will
  need to retry failed requests.
 
  The TCP receive path is handled under PF_MEMALLOC for SOCK_VMIO 
  sockets.
  This makes sure we do not block the critical socket, and that we do not
  fail to process incomming data.
 
  Signed-off-by: Peter Zijlstra [EMAIL PROTECTED]
  CC: Mike Christie [EMAIL PROTECTED]
  ---
   drivers/scsi/iscsi_tcp.c|  103 
  +++-
   drivers/scsi/scsi_transport_iscsi.c |   23 +++-
   include/scsi/libiscsi.h |1 
   include/scsi/scsi_transport_iscsi.h |2 
   4 files changed, 113 insertions(+), 16 deletions(-)
 
  Index: linux-2.6/drivers/scsi/iscsi_tcp.c
  ===
  --- linux-2.6.orig/drivers/scsi/iscsi_tcp.c
  +++ linux-2.6/drivers/scsi/iscsi_tcp.c
  @@ -42,6 +42,7 @@
   #include scsi/scsi_host.h
   #include scsi/scsi.h
   #include scsi/scsi_transport_iscsi.h
  +#include scsi/scsi_device.h
   
   #include iscsi_tcp.h
   
  @@ -845,9 +846,13 @@ iscsi_tcp_data_recv(read_descriptor_t *r
 int rc;
 struct iscsi_conn *conn = rd_desc-arg.data;
 struct iscsi_tcp_conn *tcp_conn = conn-dd_data;
  -  int processed;
  +  int processed = 0;
 char pad[ISCSI_PAD_LEN];
 struct scatterlist sg;
  +  unsigned long pflags = current-flags;
  +
  +  if (sk_has_vmio(tcp_conn-sock-sk))
  +  current-flags |= PF_MEMALLOC;
   
  Is this too late or not needed or what is it for? This function gets run
  from the network layer's softirq and at this point we have a skbuff with
  data that we want to process. The iscsi layer also does not allocate
  memory for read or write IO in this path.
  I thought I found allocations in that path, lemme search...
  found this:
 
  iscsi_tcp_data_recv()
iscsi_data_rescv()
  iscsi_complete_pdu()
__iscsi_complete_pdu()
  iscsi_recv_pdu()
alloc_skb( GFP_ATOMIC);
 
  You are right that is for the netlink interface. Could we move the
  PF_MEMALLOC setting and clearing to iscsi_recv_pdu and and add it to
  iscsi_conn_error in scsi_transport_iscsi.c so that iscsi_iser and
  qla4xxx will have it set when they need it. I will send a patch for this
  along with a way to have the netlink sock vmio set for all iscsi drivers
  that need it.
  I already have such a patch, look at:
  http://programming.kicks-ass.net/kernel-patches/vm_deadlock/current/iscsi_vmio.patch
 
  
  You are drowning me in patches :) I did not see that one. I was still
  commenting on this patch :)
  
  The new patch looks ok.
  
 
 Oh, I think you need a sock_put to go with netlink lookup (lookup does a
 hold).

D'0h again, I'd forgotten I'd used it there too.
hit refresh :-)


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 20/20] iscsi: support for swapping over iSCSI.

2006-09-14 Thread Mike Christie
Mike Christie wrote:
 Peter Zijlstra wrote:
 On Thu, 2006-09-14 at 22:35 +0200, Peter Zijlstra wrote:
 On Thu, 2006-09-14 at 14:22 -0500, Mike Christie wrote:
 I thought I found allocations in that path, lemme search...
 found this:

 iscsi_tcp_data_recv()
   iscsi_data_rescv()
 iscsi_complete_pdu()
   __iscsi_complete_pdu()
 iscsi_recv_pdu()
   alloc_skb( GFP_ATOMIC);

 You are right that is for the netlink interface. Could we move the
 PF_MEMALLOC setting and clearing to iscsi_recv_pdu and and add it to
 iscsi_conn_error in scsi_transport_iscsi.c so that iscsi_iser and
 qla4xxx will have it set when they need it. I will send a patch for this
 along with a way to have the netlink sock vmio set for all iscsi drivers
 that need it.
 I already have such a patch, look at:
 http://programming.kicks-ass.net/kernel-patches/vm_deadlock/current/iscsi_vmio.patch

 but what conditional do you want to use for PF_MEMALLOC, an
 unconditional setting will be highly unpopular.

 Hmm, perhaps you could key it of sk_has_vmio(nls)...
 On second thought, not such a good idea, that will still be too course.
 You only want to force feed stuff originating from
 sk_has_vmio(iscsi_tcp_conn-sock-sk) connections, not all
 connectections as soon as there is a swapper in the system.

 
 You can move the iscsi_session-swapper field to the iscsi_cls_session
 and have iscsi_swapdev take a iscsi_cls_session and set that flag.
 iscsi_recv_pdu and iscsi_conn_error and all the llds can then access
 this bit.
 
 In order to preserve that information you need extra state, abusing this
 process flags is as good as propagating __GFP_EMERGENCY down the call
 chain with extra gfp_t arguments, perhaps even better, since it will
 make sure we catch all allocations.


Oh yeah, on the send side we also allocate some memory for the netlink
interface if there is a connection error (iscsi_conn_failure -
iscsi_conn_error). And when that is called from the transmit side we can
change the GFP_ATOMICs to GFP_NOIOs since we have process context.

So I am just saying we need to set that flag in a couple more places (if
you set it in iscsi_conn_error if iscsi_cls_session-swapper is set then
don't worry about it), and that I need to change iscsi_conn_failure and
iscsi_conn_error to take a gfp_t as an argument (or do a in_interrupt or
something) so we can use GFP_NOIO in the transmit code.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 20/20] iscsi: support for swapping over iSCSI.

2006-09-13 Thread Mike Christie
Peter Zijlstra wrote:
 Implement sht-swapdev() for iSCSI. This method takes care of reserving
 the extra memory needed and marking all relevant sockets with SOCK_VMIO.
 
 When used for swapping, TCP socket creation is done under GFP_MEMALLOC and
 the TCP connect is done with SOCK_VMIO to ensure their success. Also the
 netlink userspace interface is marked SOCK_VMIO, this will ensure that even
 under pressure we can still communicate with the daemon (which runs as
 mlockall() and needs no additional memory to operate).
 
 Netlink requests are handled under the new PF_MEM_NOWAIT when a swapper is
 present. This ensures that the netlink socket will not block. User-space will
 need to retry failed requests.
 
 The TCP receive path is handled under PF_MEMALLOC for SOCK_VMIO sockets.
 This makes sure we do not block the critical socket, and that we do not
 fail to process incomming data.
 
 Signed-off-by: Peter Zijlstra [EMAIL PROTECTED]
 CC: Mike Christie [EMAIL PROTECTED]
 ---
  drivers/scsi/iscsi_tcp.c|  103 
 +++-
  drivers/scsi/scsi_transport_iscsi.c |   23 +++-
  include/scsi/libiscsi.h |1 
  include/scsi/scsi_transport_iscsi.h |2 
  4 files changed, 113 insertions(+), 16 deletions(-)
 
 Index: linux-2.6/drivers/scsi/iscsi_tcp.c
 ===
 --- linux-2.6.orig/drivers/scsi/iscsi_tcp.c
 +++ linux-2.6/drivers/scsi/iscsi_tcp.c
 @@ -42,6 +42,7 @@
  #include scsi/scsi_host.h
  #include scsi/scsi.h
  #include scsi/scsi_transport_iscsi.h
 +#include scsi/scsi_device.h
  
  #include iscsi_tcp.h
  
 @@ -845,9 +846,13 @@ iscsi_tcp_data_recv(read_descriptor_t *r
   int rc;
   struct iscsi_conn *conn = rd_desc-arg.data;
   struct iscsi_tcp_conn *tcp_conn = conn-dd_data;
 - int processed;
 + int processed = 0;
   char pad[ISCSI_PAD_LEN];
   struct scatterlist sg;
 + unsigned long pflags = current-flags;
 +
 + if (sk_has_vmio(tcp_conn-sock-sk))
 + current-flags |= PF_MEMALLOC;
  

Is this too late or not needed or what is it for? This function gets run
from the network layer's softirq and at this point we have a skbuff with
data that we want to process. The iscsi layer also does not allocate
memory for read or write IO in this path.

I think we would want to set this flag at a lower level. Something
closer to where the skbuf is allocated?
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 20/20] iscsi: support for swapping over iSCSI.

2006-09-12 Thread Peter Zijlstra
Implement sht-swapdev() for iSCSI. This method takes care of reserving
the extra memory needed and marking all relevant sockets with SOCK_VMIO.

When used for swapping, TCP socket creation is done under GFP_MEMALLOC and
the TCP connect is done with SOCK_VMIO to ensure their success. Also the
netlink userspace interface is marked SOCK_VMIO, this will ensure that even
under pressure we can still communicate with the daemon (which runs as
mlockall() and needs no additional memory to operate).

Netlink requests are handled under the new PF_MEM_NOWAIT when a swapper is
present. This ensures that the netlink socket will not block. User-space will
need to retry failed requests.

The TCP receive path is handled under PF_MEMALLOC for SOCK_VMIO sockets.
This makes sure we do not block the critical socket, and that we do not
fail to process incomming data.

Signed-off-by: Peter Zijlstra [EMAIL PROTECTED]
CC: Mike Christie [EMAIL PROTECTED]
---
 drivers/scsi/iscsi_tcp.c|  103 +++-
 drivers/scsi/scsi_transport_iscsi.c |   23 +++-
 include/scsi/libiscsi.h |1 
 include/scsi/scsi_transport_iscsi.h |2 
 4 files changed, 113 insertions(+), 16 deletions(-)

Index: linux-2.6/drivers/scsi/iscsi_tcp.c
===
--- linux-2.6.orig/drivers/scsi/iscsi_tcp.c
+++ linux-2.6/drivers/scsi/iscsi_tcp.c
@@ -42,6 +42,7 @@
 #include scsi/scsi_host.h
 #include scsi/scsi.h
 #include scsi/scsi_transport_iscsi.h
+#include scsi/scsi_device.h
 
 #include iscsi_tcp.h
 
@@ -845,9 +846,13 @@ iscsi_tcp_data_recv(read_descriptor_t *r
int rc;
struct iscsi_conn *conn = rd_desc-arg.data;
struct iscsi_tcp_conn *tcp_conn = conn-dd_data;
-   int processed;
+   int processed = 0;
char pad[ISCSI_PAD_LEN];
struct scatterlist sg;
+   unsigned long pflags = current-flags;
+
+   if (sk_has_vmio(tcp_conn-sock-sk))
+   current-flags |= PF_MEMALLOC;
 
/*
 * Save current SKB and its offset in the corresponding
@@ -866,7 +871,7 @@ more:
 
if (unlikely(conn-suspend_rx)) {
debug_tcp(conn %d Rx suspended!\n, conn-id);
-   return 0;
+   goto out;
}
 
if (tcp_conn-in_progress == IN_PROGRESS_WAIT_HEADER ||
@@ -877,7 +882,7 @@ more:
goto nomore;
   else {
iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
-   return 0;
+   goto out;
   }
}
 
@@ -891,7 +896,7 @@ more:
tcp_conn-in_progress = IN_PROGRESS_DATA_RECV;
} else if (rc) {
iscsi_conn_failure(conn, rc);
-   return 0;
+   goto out;
}
}
 
@@ -905,7 +910,7 @@ more:
if (rc == -EAGAIN)
goto again;
iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
-   return 0;
+   goto out;
}
 
memcpy(recv_digest, conn-data, sizeof(uint32_t));
@@ -914,7 +919,7 @@ more:
  0x%x != 0x%x\n, recv_digest,
  tcp_conn-in.datadgst);
iscsi_conn_failure(conn, ISCSI_ERR_DATA_DGST);
-   return 0;
+   goto out;
} else {
debug_tcp(iscsi_tcp: data digest match!
  0x%x == 0x%x\n, recv_digest,
@@ -934,7 +939,7 @@ more:
if (rc == -EAGAIN)
goto again;
iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
-   return 0;
+   goto out;
}
tcp_conn-in.copy -= tcp_conn-in.padding;
tcp_conn-in.offset += tcp_conn-in.padding;
@@ -969,6 +974,8 @@ more:
 nomore:
processed = tcp_conn-in.offset - offset;
BUG_ON(processed == 0);
+out:
+   current-flags = pflags;
return processed;
 
 again:
@@ -979,7 +986,7 @@ again:
BUG_ON(processed  len);
 
conn-rxdata_octets += processed;
-   return processed;
+   goto out;
 }
 
 static void
@@ -1735,14 +1742,26 @@ iscsi_tcp_ep_connect(struct iscsi_cls_se
 {
struct socket *sock;
int rc, size, arg = 1, window = 524288;
+   int swapper = 0;
+   unsigned long pflags = current-flags;
+
+   if (cls_session) {
+   struct iscsi_session *session;
+   session = class_to_transport_session(cls_session);
+   swapper = session-swapper;
+   }
+
+   if (swapper)
+   pflags |= PF_MEMALLOC;
 
rc = sock_create_kern(dst_addr-sa_family, SOCK_STREAM,