[PATCH 4/6] [IPV6]: Reorganize strut ipv6_mc_socklist to save 8 bytes

2008-01-31 Thread Arnaldo Carvalho de Melo
/home/acme/git/net-2.6/net/dccp/ipv6.c:
  struct ipv6_mc_socklist |   -8
 1 struct changed

Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 include/net/if_inet6.h |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index b0b882e..b2509d4 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -82,9 +82,9 @@ struct ipv6_mc_socklist
 {
struct in6_addr addr;
int ifindex;
+   unsigned intsfmode; /* MCAST_{INCLUDE,EXCLUDE} */
struct ipv6_mc_socklist *next;
rwlock_tsflock;
-   unsigned intsfmode; /* MCAST_{INCLUDE,EXCLUDE} */
struct ip6_sf_socklist  *sflist;
 };
 
-- 
1.5.3.8

-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/6] [INET6]: Reorganize struct inet6_dev to save 8 bytes

2008-01-31 Thread Arnaldo Carvalho de Melo
And make it a multiple of a 64 bytes, reducing cacheline trashing:

Before:

[EMAIL PROTECTED] net-2.6]$ pahole -C inet6_dev net/dccp/ipv6.o
struct inet6_dev {
SNIP
long unsigned int  mc_maxdelay;  /*48 8 */
unsigned char  mc_qrv;   /*56 1 */
unsigned char  mc_gq_running;/*57 1 */
unsigned char  mc_ifc_count; /*58 1 */

/* XXX 5 bytes hole, try to pack */

/* --- cacheline 1 boundary (64 bytes) --- */
struct timer_list  mc_gq_timer;  /*6448 */
SNIP
__u32  if_flags; /*   180 4 */
intdead; /*   184 4 */
u8 rndid[8]; /*   188 8 */

/* XXX 4 bytes hole, try to pack */

/* --- cacheline 3 boundary (192 bytes) was 8 bytes ago --- */
struct timer_list  regen_timer;  /*   20048 */

SNIP

/* size: 456, cachelines: 8 */
/* sum members: 447, holes: 2, sum holes: 9 */
/* last cacheline: 8 bytes */
};

After:

[EMAIL PROTECTED] net-2.6]$ codiff net/dccp/ipv6.o.old net/dccp/ipv6.o
/home/acme/git/net-2.6/net/dccp/ipv6.c:
  struct inet6_dev |   -8
   1 struct changed

Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 include/net/if_inet6.h |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index b24508a..b0b882e 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -171,6 +171,7 @@ struct inet6_dev
unsigned char   mc_qrv;
unsigned char   mc_gq_running;
unsigned char   mc_ifc_count;
+   int dead;
struct timer_list   mc_gq_timer;/* general query timer */
struct timer_list   mc_ifc_timer;   /* interface change timer */
 
@@ -178,7 +179,6 @@ struct inet6_dev
rwlock_tlock;
atomic_trefcnt;
__u32   if_flags;
-   int dead;
 
 #ifdef CONFIG_IPV6_PRIVACY
u8  rndid[8];
-- 
1.5.3.8

-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 6/6] [TCP]: Reorganize struct tcp_sock to save 16 bytes on 64-bit arch

2008-01-31 Thread Arnaldo Carvalho de Melo
/home/acme/git/net-2.6/net/ipv6/tcp_ipv6.c:
  struct tcp_sock  |  -16
  struct tcp6_sock |  -16
 2 structs changed

Now it is at:

/* size: 1552, cachelines: 25 */
/* paddings: 2, sum paddings: 8 */
/* last cacheline: 16 bytes */

As soon as we stop using skb_queue_list we'll get it down to 24 cachelines.

Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 include/linux/tcp.h |6 --
 1 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 08027f1..f48644d 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -304,7 +304,6 @@ struct tcp_sock {
u32 rtt_seq;/* sequence number to update rttvar */
 
u32 packets_out;/* Packets which are in flight*/
-   u32 retrans_out;/* Retransmitted packets out*/
 /*
  *  Options received (usually on last packet, some only on SYN packets).
  */
@@ -332,6 +331,8 @@ struct tcp_sock {
 
struct tcp_sack_block recv_sack_cache[4];
 
+   u32 retrans_out;/* Retransmitted packets out*/
+
struct sk_buff *highest_sack;   /* highest skb with SACK received
 * (validity guaranteed only if
 * sacked_out  0)
@@ -372,7 +373,6 @@ struct tcp_sock {
 
unsigned intkeepalive_time;   /* time before keep alive 
takes place */
unsigned intkeepalive_intvl;  /* time interval between keep 
alive probes */
-   int linger2;
 
unsigned long last_synq_overflow; 
 
@@ -398,6 +398,8 @@ struct tcp_sock {
u32   probe_seq_end;
} mtu_probe;
 
+   int linger2;
+
 #ifdef CONFIG_TCP_MD5SIG
 /* TCP AF-Specific parts; only used by MD5 Signature support so far */
struct tcp_sock_af_ops  *af_specific;
-- 
1.5.3.8

-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/6] [SOCK] proto: Add hashinfo member to struct proto

2008-01-31 Thread Arnaldo Carvalho de Melo
This way we can remove TCP and DCCP specific versions of

sk-sk_prot-get_port: both v4 and v6 use inet_csk_get_port
sk-sk_prot-hash: inet_hash is directly used, only v6 need
   a specific version to deal with mapped sockets
sk-sk_prot-unhash:   both v4 and v6 use inet_hash directly

struct inet_connection_sock_af_ops also gets a new member, bind_conflict,
so that inet_csk_get_port can find the per family routine.

Now only the lookup routines receive as a parameter a struct inet_hashtable.

With this we further reuse code, reducing the difference among INET transport
protocols.

Eventually work has to be done on UDP and SCTP to make them share this
infrastructure and get as a bonus inet_diag interfaces so that iproute can be
used with these protocols.

vmlinux:

add/remove: 2/4 grow/shrink: 13/6 up/down: 539/-508 (31)
function old new   delta
inet_hash  - 237+237
inet_unhash- 190+190
inet_csk_get_port558 573 +15
tcp_sendmsg 27682777  +9
unix_proto   344 352  +8
udplite_prot 344 352  +8
udp_prot 344 352  +8
tcp_prot 344 352  +8
raw_prot 344 352  +8
packet_proto 344 352  +8
netlink_proto344 352  +8
ipv4_specific 96 104  +8
inet_put_port137 145  +8
__inet_hash_nolisten 310 318  +8
__inet6_hash 352 360  +8
tcp_v4_syn_recv_sock 666 664  -2
superblock_doinit587 582  -5
tcp_v4_destroy_sock  557 550  -7
tcp_set_state420 413  -7
__inet_hash_connect  673 665  -8
__inet_lookup_listener   333 313 -20
tcp_v4_get_port   31   - -31
tcp_v4_hash   48   - -48
tcp_unhash   179   --179
__inet_hash  201   --201

ipv6.ko:

add/remove: 0/1 grow/shrink: 6/2 up/down: 48/-42 (6)
function old new   delta
udpv6_prot   344 352  +8
udplitev6_prot   344 352  +8
tcpv6_prot   344 352  +8
rawv6_prot   344 352  +8
ipv6_specific 96 104  +8
ipv6_mapped   96 104  +8
tcp_v6_syn_recv_sock16331629  -4
tcp_v6_hash   70  63  -7
tcp_v6_get_port   31   - -31

dccp.ko:

add/remove: 0/2 grow/shrink: 1/4 up/down: 2/-243 (-241)
function old new   delta
dccp_feat_change_recv   10501052  +2
init_module  961 960  -1
dccp_init961 960  -1
dccp_set_state   474 467  -7
dccp_destroy_sock191 184  -7
dccp_hash 48   - -48
dccp_unhash  179   --179

dccp_ipv4.ko:

add/remove: 0/1 grow/shrink: 2/1 up/down: 16/-33 (-17)
function old new   delta
dccp_v4_prot 344 352  +8
dccp_ipv4_af_ops  96 104  +8
dccp_v4_request_recv_sock522 520  -2
dccp_v4_get_port  31   - -31

dccp_ipv6.ko:

add/remove: 0/1 grow/shrink: 4/1 up/down: 29/-38 (-9)
function old new   delta
dccp_v6_prot 344 352  +8
dccp_ipv6_mapped  96 104  +8
dccp_ipv6_af_ops  96 104  +8
dccp_v6_request_recv_sock   15141519  +5
dccp_v6_hash  68  61  -7
dccp_v6_get_port  31   - -31

Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 include/net/inet6_hashtables.h |2 +-
 include/net/inet_connection_sock.h |8 ++---
 include/net/inet_hashtables.h  |   51

[PATCH 3/6] [DCCP]: Reorganize struct dccp_sock to save 8 bytes

2008-01-31 Thread Arnaldo Carvalho de Melo
/home/acme/git/net-2.6/net/dccp/ipv6.c:
  struct dccp_sock  |   -8
  struct dccp6_sock |   -8
 2 structs changed

Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 include/linux/dccp.h |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 484e45c..aa07370 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -525,6 +525,7 @@ struct dccp_sock {
__u64   dccps_gsr;
__u64   dccps_gar;
__be32  dccps_service;
+   __u32   dccps_mss_cache;
struct dccp_service_list*dccps_service_list;
__u32   dccps_timestamp_echo;
__u32   dccps_timestamp_time;
@@ -533,7 +534,6 @@ struct dccp_sock {
__u16   dccps_pcslen;
__u16   dccps_pcrlen;
unsigned long   dccps_ndp_count;
-   __u32   dccps_mss_cache;
unsigned long   dccps_rate_last;
struct dccp_minisockdccps_minisock;
struct dccp_ackvec  *dccps_hc_rx_ackvec;
-- 
1.5.3.8

-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHES 0/6]: Move hashinfo to sk_prot and struct reorgs

2008-01-31 Thread Arnaldo Carvalho de Melo
Hi David,

Please consider pulling from:

master.kernel.org:/pub/scm/linux/kernel/git/acme/net-2.6

Best Regards,

- Arnaldo

 include/linux/dccp.h   |2 -
 include/linux/tcp.h|6 ++-
 include/net/if_inet6.h |4 +-
 include/net/inet6_hashtables.h |2 -
 include/net/inet_connection_sock.h |8 +
 include/net/inet_hashtables.h  |   51 +---
 include/net/inet_timewait_sock.h   |3 -
 include/net/sock.h |3 +
 net/dccp/dccp.h|2 -
 net/dccp/ipv4.c|   18 ---
 net/dccp/ipv6.c|   20 +---
 net/dccp/proto.c   |   18 +--
 net/ipv4/inet_connection_sock.c|8 +
 net/ipv4/inet_hashtables.c |   58 ++---
 net/ipv4/tcp.c |2 -
 net/ipv4/tcp_ipv4.c|   31 +--
 net/ipv6/inet6_hashtables.c|4 +-
 net/ipv6/tcp_ipv6.c|   19 +---
 18 files changed, 111 insertions(+), 148 deletions(-)
-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 5/6] [INET_TIMEWAIT_SOCK]: Reorganize struct inet_timewait_sock to save some bytes

2008-01-31 Thread Arnaldo Carvalho de Melo
/home/acme/git/net-2.6/net/ipv6/tcp_ipv6.c:
  struct inet_timewait_sock |   -8
  struct tcp_timewait_sock  |   -8
 2 structs changed
  tcp_v6_rcv|   -6
 1 function changed, 6 bytes removed, diff: -6

Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 include/net/inet_timewait_sock.h |3 +--
 1 files changed, 1 insertions(+), 2 deletions(-)

diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 67e9250..faead52 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -116,8 +116,8 @@ struct inet_timewait_sock {
 #define tw_hash__tw_common.skc_hash
 #define tw_prot__tw_common.skc_prot
 #define tw_net __tw_common.skc_net
+   int tw_timeout;
volatile unsigned char  tw_substate;
-   /* 3 bits hole, try to pack */
unsigned char   tw_rcv_wscale;
/* Socket demultiplex comparisons on incoming packets. */
/* these five are in inet_sock */
@@ -130,7 +130,6 @@ struct inet_timewait_sock {
__u8tw_ipv6only:1;
/* 15 bits hole, try to pack */
__u16   tw_ipv6_offset;
-   int tw_timeout;
unsigned long   tw_ttd;
struct inet_bind_bucket *tw_tb;
struct hlist_node   tw_death_node;
-- 
1.5.3.8

-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 6/6] [TCP]: Reorganize struct tcp_sock to save 16 bytes on 64-bit arch

2008-01-31 Thread Arnaldo Carvalho de Melo
Em Thu, Jan 31, 2008 at 08:57:53PM +0100, Eric Dumazet escreveu:
 Arnaldo Carvalho de Melo a écrit :
 /home/acme/git/net-2.6/net/ipv6/tcp_ipv6.c:
   struct tcp_sock  |  -16
   struct tcp6_sock |  -16
  2 structs changed

 Now it is at:

 /* size: 1552, cachelines: 25 */
 /* paddings: 2, sum paddings: 8 */
 /* last cacheline: 16 bytes */

 As soon as we stop using skb_queue_list we'll get it down to 24 cachelines.

 Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
 ---
  include/linux/tcp.h |6 --
  1 files changed, 4 insertions(+), 2 deletions(-)

 diff --git a/include/linux/tcp.h b/include/linux/tcp.h
 index 08027f1..f48644d 100644
 --- a/include/linux/tcp.h
 +++ b/include/linux/tcp.h
 @@ -304,7 +304,6 @@ struct tcp_sock {
  u32 rtt_seq;/* sequence number to update rttvar */
  u32 packets_out;/* Packets which are in flight*/
 -u32 retrans_out;/* Retransmitted packets out*/
  /*
   *  Options received (usually on last packet, some only on SYN packets).
   */
 @@ -332,6 +331,8 @@ struct tcp_sock {
  struct tcp_sack_block recv_sack_cache[4];
  +   u32 retrans_out;/* Retransmitted packets out*/
 +

 Hum... retrans_out should sit close to packets_out (or lost_out/sacked_out 
 ???), please.

 'struct tcp_sock' is very large on 64 bits, so I would prefer to make sure 
 most paths dont need to touch all 24 cache lines (or 25 cache lines).

That is perfectly fine, I'll replace my patch with another, that states
this beyond doubt.

- Arnaldo
-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: FW: ccid2/ccid3 oopses

2008-01-09 Thread Arnaldo Carvalho de Melo
Em Wed, Jan 09, 2008 at 12:28:27PM +, Gerrit Renker escreveu:
 Roland, -
 
  apparently, i got crashes when loading/unloading other driver modules just
  after ccid2 or ccid3 had been loaded/unloaded _once_ (have not used them at
  all, just modprobe module;modprobe -r module) 
  
 snip
  the easiest way to reproduce is:
  
  while true;do modprobe dccp_ccid2/3;modprobe -r dccp_ccid2/3;done
  after short time, the kernel oopses (messages below)
  
  i`m not sure if this is worth to be filed at kernel bugzilla, so i`m 
  contacting
  you personally first.
 
 The issue is known: once loaded, the DCCP modules can not be unloaded
 without causing a crash as the one you have observed. This is due to the
 fact that dccp_ipv{4,6} use control sockets which need to be released
 before the module can be unloaded.
 When the control sockets are not released then crashes will always
 result.
 In earlier versions of DCCP there was a kernel option known as unload hack,
 which conditionally inserted 
   sock_release(dccp_v{4,6}_ctl_socket);
 in 
   dccp_v{4,6}_exit()
 
 However, as the name says, it is a hack since there are other issues to 
 be considered:
   * sockets in timewait state
   * other wait states (e.g. half-open connections)
   * memory which has not been released
   * module dependencies
 
 With regard to the latter, I am normally using the Unload Hack and
 release modules in the following order:
 
   dccp_probe = dccp_ccid2 = dccp_ccid3 = dccp_tfrc_lib =
 dccp_ipv6  = dccp_ipv4  = dccp_diag  = dccp
 
 Long story short
  * the CCID/DCCP modules can currently not safely be unloaded
  * maybe we should disable module unloading for the mainline kernel
  * if anyone is interested to use the unload hack, here is the old patch
http://www.erg.abdn.ac.uk/users/gerrit/dccp/testing_dccp/Unload_Hack.diff

Gerrit, the control socket isn't attached to any CCID module, so the
CCID modules should be safe to remove, and IIRC they were safe to
unload.

The unload hack was for something else, for the core DCCP modules. We
can't unload because there are refcounts held by the control sock, so
the unload hack would just destroy the control sock and thus the module
refcount would reach zero and it could then be unloaded.

I've been consistently being sidetracked with work (huh :-)) and
couldn't look at this issue, but the CCID modules should be safe to
unload.

- Arnaldo
-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: dccp send

2008-01-03 Thread Arnaldo Carvalho de Melo
Em Fri, Jan 04, 2008 at 11:25:29AM +1300, Ian McDonald escreveu:
 On Jan 4, 2008 11:19 AM, Tomasz Grobelny [EMAIL PROTECTED] wrote:
  I think I almost got it. Thanks a lot for the detailed explanation. But I've
  got two more questions:
  1. How can I control the amount of memory allowed to be allocated for the
  socket send buffer? It it somehow connected with tx_qlen or is it not?
 
 I wrote the code here so let me comment - there is no fixed amount of
 memory allocated - it is determined by the tx_qlen. I was looking at

there is no memory allocated in advance nor explicit, DCCP specific
mechanism for limiting the amount of memory a socket can use for its
write queue, but DCCP uses sock_alloc_send_skb and that function does
memory accounting and checks if the maximum memory (SO_SNDBUF) has been
reached, when it makes the app to sleep if O_NONBLOCK is not set.

 implementing memory limits but it got too hard and as DCCP doesn't
 combine packets I thought it made it much simpler just to put a limit
 by packets.
 
  2. If I decide that for whatever reason a given skb already in 
  sk_write_queue
  should not be transmitted I should remove it from queue and call kfree_skb 
  on
  it, it that right?
 
 Correct. Remember of course all your locking mechanisms. If you want a
 good example look at the below towards the end:
 http://wand.net.nz/~iam4/dccp/patches20/30-best_packet_next.diff
 
 (NB this patch applies against 2.6.20 after previous patches in series).

Correct if he is in-kernel.

- Arnaldo
-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: dccp send

2008-01-02 Thread Arnaldo Carvalho de Melo
Em Wed, Jan 02, 2008 at 11:34:25PM +0100, Tomasz Grobelny escreveu:
 Dnia Wednesday 02 of January 2008, Arnaldo Carvalho de Melo napisał:
  Em Wed, Jan 02, 2008 at 01:41:16AM +0100, Tomasz Grobelny escreveu:
   When I use dccp does sendmsg function block (until it sends the packet)?
   If so, should it? In either case, how to make it just queue the packet
   and return?
 
  The interface is the same as for other AF_INET transports, use
  O_NONBLOCK (open, fcntl) if you want it to be non blocking.
 
 But what are the semantics? In TCP if sendmsg blocks the application has no 
 choice but to wait. The time for which sendmsg blocks it tightly connected 

In DCCP its exactly the same semantic, sendmsg may block waiting for
send buffer space. If you don't want that use O_NONBLOCK and it will
return right away with EAGAIN.

 with speed at which packets can be transmitted over the network. Is it the 
 case for dccp as well? If so, does it mean that provided stable network 
 conditions over longer period of time and using blocking version of sendmsg 
 in a loop very few packets should be lost? (By very few I mean  10%)

If the application sends many packets in rapid succession before the
DCCP core gets permission from the CCID in use to send those packets to
the network what will happen is that sk_write_queue will have packets
waiting, using the memory allowed to be allocated for the socket send
buffer, which will make sock_alloc_send_skb wait for the packets in
sk_write_queue to be sent, freeing up send buffer space, when it will be
possible for sock_alloc_send_skb to actually allocate memory for the
packet and return to dccp_sendmsg, that in turn will return to the
application after putting the newly allocated packet in sk_write_queue,
doing the send buffer accounting and trying to send something in
dccp_write_xmit, where it will ask the CCID if it can send some more
packets, got it?

Look at these sequences:

  sock_alloc_send_skb - skb_set_owner_w

  This will set the skb destructor to be sock_wfree

Now look at what happens when the NIC driver finally frees the sk_buff
(the network packet):

  kfree_skb - __kfree_skb - skb_release_all - skb-destructor 
(sock_wfree)

sock_wfree, in turn, will call sk-sk_write_space, that for DCCP is
dccp_write_space (see inet_create - sk-sk_prot-init
(dccp_v4_init_sock) - dccp_init_sock), that will wake up any tasks that
are sleeping in sk_sleep, that is... the process doing the dccp_sendmsg
- sock_alloc_send_skb.

All this behaviour can be controlled thru O_NONBLOCK _and_
setsockopt(SO_SNDTIMEO), just like TCP.
 
  It queues it in the write routine and tries to send it right away, but
  doesn't waits for actually sending the packet, i.e. it only checks if
  there is write space available, if you set O_NONBLOCK and there is no
  space it returns ENOBUFS, if O_NONBLOCK is not set it will sleep waiting
  for write space to be made available, when the process will be awaken.

 dccp_sendmsg seems to block on call to sock_alloc_send_skb. Which at first 
 look does not seems to send anything anywhere. So why does it block at all? 
 Can't it just allocate the data needed and return?

See above.

- Arnaldo
-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: panic on 2.6.24rc5

2008-01-01 Thread Arnaldo Carvalho de Melo
Em Sun, Dec 30, 2007 at 04:18:36PM +0100, Tomasz Grobelny escreveu:
 On Friday 28 December 2007, I wrote:
  Dnia Wednesday 26 of December 2007, napisałeś:
   What are the panics you are getting? It might be worth posting them to
   the list.
 
  Here is the screenshot I captured a few days ago. Details:
   - kernel-vanilla 2.6.24rc5,
 Now I'm using kernel as described in Arnaldo's mail (davem/net-2.6.25 + 
 patches 0001 to 0051).

dccp_hdlr_ack_ratio is not on net-2.6.25, which means it is in one of
the 0001 to 0051 patches from Gerrit. So, to help us understand where is
the problem you could try building a kernel without applying any of the
0001 to 0051 patches.

Could you do this at and report the results?

I'm also assuming you are using CCID2 either by explicitely using
feature negotiation setsockopt calls or by using the default, that is
CCID2. If this is the case it would also be interesting to, before
rebuilding the kernel, to try using CCID3 as the problem you're
experiencing when using netem is exactly in the interface between the
core DCCP code and the CCID being used.

- Arnaldo
-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: panic on 2.6.24rc5

2008-01-01 Thread Arnaldo Carvalho de Melo
Em Tue, Jan 01, 2008 at 10:30:56PM +0100, Tomasz Grobelny escreveu:
 Dnia Tuesday 01 of January 2008, Arnaldo Carvalho de Melo napisał:
  Em Sun, Dec 30, 2007 at 04:18:36PM +0100, Tomasz Grobelny escreveu:
   On Friday 28 December 2007, I wrote:
Dnia Wednesday 26 of December 2007, napisałeś:
 What are the panics you are getting? It might be worth posting them
 to the list.
   
Here is the screenshot I captured a few days ago. Details:
 - kernel-vanilla 2.6.24rc5,
  
   Now I'm using kernel as described in Arnaldo's mail (davem/net-2.6.25 +
   patches 0001 to 0051).
 
  dccp_hdlr_ack_ratio is not on net-2.6.25, which means it is in one of
  the 0001 to 0051 patches from Gerrit. So, to help us understand where is
  the problem you could try building a kernel without applying any of the
  0001 to 0051 patches.
 
  Could you do this at and report the results?
 
 But what should I exactly test? Just whether the delays are gone or something 
 more? I'll try to when I have some time (hopefully during weekend).

If the kernel oopses, if the results are the same or are some problem
introduced in the patches by Gerrit. I.e. you would help us to narrow
down the problem by trying a binary search of changeset history built
kernels. 

Please take a look at Documentation/BUG-HUNTING in the kernel sources.
The process is somehow time consuming and its understandable if you
can't perform it, your reports are already of great help, but if you can
try helping us to narrow down exactly when some bugs you notice
appeared, or if they were always present after some kernel builds, we'd
be really grateful :-)
 
  I'm also assuming you are using CCID2 either by explicitely using
  feature negotiation setsockopt calls or by using the default, that is

 In fact I was using ccid3. When I switched to ccid2 it started to work more 
 or 
 less ok. It seems that for whatever reason ccid_hc_tx_send_packet is 
 returning too big values (up to 64000).

That is an excellent data point, ccid3 code is way more complex than
ccid2, so trying with both is always a valuable data point.
 
  CCID2. If this is the case it would also be interesting to, before
  rebuilding the kernel, to try using CCID3 as the problem you're
  experiencing when using netem is exactly in the interface between the
  core DCCP code and the CCID being used.

 The problem with netem exists with both ccid2 and ccid3. I suspect that when 
 all three elements of the connection (server, client and netem) are on one 
 host netem is able to communicate packet loss by returning error. If netem 
 was on a diffrent host the packet would be sent correctly (no BUG: err=1 
 after ccid_hc_tx_packet_sent) but dropped on another host. I think that in 
 this situation dccp should behave as if the packet was simply dropped.

I can't work on this right now, will look at it tomorrow, but thanks for
the data points!

- Arnaldo
-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: dccp send

2008-01-01 Thread Arnaldo Carvalho de Melo
Em Wed, Jan 02, 2008 at 01:41:16AM +0100, Tomasz Grobelny escreveu:
 When I use dccp does sendmsg function block (until it sends the packet)? If 
 so, should it? In either case, how to make it just queue the packet and 
 return?

The interface is the same as for other AF_INET transports, use
O_NONBLOCK (open, fcntl) if you want it to be non blocking.

It queues it in the write routine and tries to send it right away, but
doesn't waits for actually sending the packet, i.e. it only checks if
there is write space available, if you set O_NONBLOCK and there is no
space it returns ENOBUFS, if O_NONBLOCK is not set it will sleep waiting
for write space to be made available, when the process will be awaken.

Use setsockopt(SO_SNDTIMEO) to change the default send timeout, etc.

- Arnaldo
-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: panic on 2.6.24rc5

2008-01-01 Thread Arnaldo Carvalho de Melo
Em Wed, Jan 02, 2008 at 01:57:14AM +0100, Tomasz Grobelny escreveu:
 Dnia Wednesday 02 of January 2008, Arnaldo Carvalho de Melo napisał:
  If the kernel oopses, if the results are the same or are some problem
  introduced in the patches by Gerrit. I.e. you would help us to narrow
  down the problem by trying a binary search of changeset history built
  kernels.

 Oh, and by the way: does there exist any set of automated tests for dccp? It 
 would be nice to have one, wouldn't it? Otherwise accepting any patch is 
 quite risky...

There are test programs, documented in the wiki, and there is peer
review too :-)

And DCCP on Linux was written in such a way that a large part of its
core engine is actually shared with TCP, benefiting from a much bigger
set of developers and testers.

But please feel free to add more automated tests, it'll benefit us all.

- Arnaldo
-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: mirror?

2007-12-26 Thread Arnaldo Carvalho de Melo
Em Tue, Dec 25, 2007 at 08:23:05AM +1300, Ian McDonald escreveu:
 On 12/25/07, Tomasz Grobelny [EMAIL PROTECTED] wrote:
  As http://www.erg.abdn.ac.uk/ seems to be down at least since yesterday I'd
  like to ask whether any mirror of dccp git tree and/or dccp patches to
  mainline kernel is available. TIA,
  --
  Regards,
  Tomasz Grobelny
 
 Here is the announcement of the outage:
 
 Both the dccp and the ccid4 trees have been brought up-to-date and uploaded.
 
 Due to necessary building work, we have to power down almost all servers in
 the department from
 
Friday  21st December 2007until
Sunday   6th January  2008

I have a clone and just made the whole series available as a single
patch at:

http://oops.ghostprotocols.net:81/acme/dccp/gerrit-2.6/gerrit-pending-review.patch.bz2

The broken out patch series is also at:

http://oops.ghostprotocols.net:81/acme/dccp/gerrit-2.6/

You have to clone from DaveM's net-2.6.25 and then apply the series, for
your convenience:

git clone git://.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6.25
cd net-2.6.25
wget 
http://oops.ghostprotocols.net:81/acme/dccp/gerrit-2.6/gerrit-pending-review.patch.bz2
bzcat gerrit-pending-review.patch.bz2 | patch -p1

I plan to review the feature negotiation patch series that is the bulk
of what I still haven't reviewed in Gerrit's pending tree while its
still 2007.

Regards,

- Arnaldo
-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [DCCP] [PATCH 1/1]: Whitespace / outdated documentation

2007-12-18 Thread Arnaldo Carvalho de Melo
Em Tue, Dec 18, 2007 at 12:41:08PM +, Gerrit Renker escreveu:
 [CCID3]: Whitespace cleanups and outdated documentation
 
 This removes outdated documentation which had been forgotten to be
 removed (x_recv, rtt now appear twice, p was removed from rx_sock);
 and removes new whitespace.
 
 Signed-off-by: Gerrit Renker [EMAIL PROTECTED]

Thanks, I'll get my scripts fixed... :-\

- Arnaldo
-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC]: Break up a patch in two (rfc3448bis changes to feedback reception)

2007-12-17 Thread Arnaldo Carvalho de Melo
Hi Gerrit,

Please take a look at the two attached patches, they were made
from your patch [CCID3]: Implement rfc3448bis changes to feedback reception,
that has this changeset comment:

--
[CCID 3]: Implement rfc3448bis changes to feedback reception

This implements the algorithm to update the allowed sending rate X upon
receiving feedback packets, as described in draft rfc3448bis, 4.2/4.3.

The patch further removes two irrelevant states in TX feedback handling:

 * the NO_SENT state is only triggered in bidirectional mode,
   costing unnecessary processing.
 * the TERM (terminating) state is irrelevant.
--

The second part further removes, was made a separate patch
that I have applied before the rfc3448bis changes to feedback reception.
Doing it this way eases understanding the change as we don't mixup
identantion changes made due to removing the switch statement.

The end result should be equivalent, but please take a look and
let me know if the algorithm was changed in any way.

Thanks a lot,

- Arnaldo
From 9165240abd3d4e8280647b9372dc3f223a802347 Mon Sep 17 00:00:00 2001
From: Gerrit Renker [EMAIL PROTECTED]
Date: Mon, 17 Dec 2007 10:25:06 -0200
Subject: [PATCH 2/3] [CCID3]: Remove two irrelevant states in TX feedback 
handling

 * the NO_SENT state is only triggered in bidirectional mode,
   costing unnecessary processing.
 * the TERM (terminating) state is irrelevant.

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Ian McDonald [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/ccids/ccid3.c |  173 +++-
 1 files changed, 84 insertions(+), 89 deletions(-)

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 90e0454..1b1cb74 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -402,112 +402,107 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, 
struct sk_buff *skb)
if (!(DCCP_SKB_CB(skb)-dccpd_type == DCCP_PKT_ACK ||
  DCCP_SKB_CB(skb)-dccpd_type == DCCP_PKT_DATAACK))
return;
+   /* ... and only in the established state */
+   if (hctx-ccid3hctx_state != TFRC_SSTATE_FBACK 
+   hctx-ccid3hctx_state != TFRC_SSTATE_NO_FBACK)
+   return;
 
opt_recv = hctx-ccid3hctx_options_received;
+   now = ktime_get_real();
 
-   switch (hctx-ccid3hctx_state) {
-   case TFRC_SSTATE_NO_FBACK:
-   case TFRC_SSTATE_FBACK:
-   now = ktime_get_real();
-
-   /* estimate RTT from history if ACK number is valid */
-   r_sample = tfrc_tx_hist_rtt(hctx-ccid3hctx_hist,
-   DCCP_SKB_CB(skb)-dccpd_ack_seq, 
now);
-   if (r_sample == 0) {
-   DCCP_WARN(%s(%p): %s with bogus ACK-%llu\n, 
dccp_role(sk), sk,
- 
dccp_packet_name(DCCP_SKB_CB(skb)-dccpd_type),
- (unsigned long 
long)DCCP_SKB_CB(skb)-dccpd_ack_seq);
-   return;
-   }
+   /* Estimate RTT from history if ACK number is valid */
+   r_sample = tfrc_tx_hist_rtt(hctx-ccid3hctx_hist,
+   DCCP_SKB_CB(skb)-dccpd_ack_seq, now);
+   if (r_sample == 0) {
+   DCCP_WARN(%s(%p): %s with bogus ACK-%llu\n, dccp_role(sk), sk,
+ dccp_packet_name(DCCP_SKB_CB(skb)-dccpd_type),
+ (unsigned long long)DCCP_SKB_CB(skb)-dccpd_ack_seq);
+   return;
+   }
+
+   /* Update receive rate in units of 64 * bytes/second */
+   hctx-ccid3hctx_x_recv = opt_recv-ccid3or_receive_rate;
+   hctx-ccid3hctx_x_recv = 6;
 
-   /* Update receive rate in units of 64 * bytes/second */
-   hctx-ccid3hctx_x_recv = opt_recv-ccid3or_receive_rate;
-   hctx-ccid3hctx_x_recv = 6;
+   /* Update loss event rate (which is scaled by 1e6) */
+   pinv = opt_recv-ccid3or_loss_event_rate;
+   if (pinv == ~0U || pinv == 0)  /* see RFC 4342, 8.5   */
+   hctx-ccid3hctx_p = 0;
+   else   /* can not exceed 100% */
+   hctx-ccid3hctx_p = 100 / pinv;
+   /*
+* Validate new RTT sample and update moving average
+*/
+   r_sample = dccp_sample_rtt(sk, r_sample);
+   hctx-ccid3hctx_rtt = tfrc_ewma(hctx-ccid3hctx_rtt, r_sample, 9);
 
-   /* Update loss event rate */
-   pinv = opt_recv-ccid3or_loss_event_rate;
-   if (pinv == ~0U || pinv == 0)  /* see RFC 4342, 8.5   */
-   hctx-ccid3hctx_p = 0;
-   else   /* can not exceed 100% */
-   hctx-ccid3hctx_p = 100 / pinv;
+   if (hctx-ccid3hctx_state == TFRC_SSTATE_NO_FBACK

[PATCHES 0/5]: DCCP patches for 2.6.25

2007-12-17 Thread Arnaldo Carvalho de Melo
Hi David,

Please consider pulling from:

master.kernel.org:/pub/scm/linux/kernel/git/acme/net-2.6.25

Best Regards,

- Arnaldo

net/dccp/ccids/ccid3.c |  252 
+-
net/dccp/ccids/ccid3.h |8 -
net/dccp/dccp.h|6 -
3 files changed, 130 insertions(+), 136 deletions(-)
-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/5] [CCID3]: Use a function to update p_inv, and p is never used

2007-12-17 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

This patch
 1) concentrates previously scattered computation of p_inv into one function;
 2) removes the `p' element of the CCID3 RX sock (it is redundant);
 3) makes the tfrc_rx_info structure standalone, only used on demand.

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Ian McDonald [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/ccids/ccid3.c |   11 ---
 net/dccp/ccids/ccid3.h |8 
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 876c747..90e0454 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -917,6 +917,7 @@ static int ccid3_hc_rx_getsockopt(struct sock *sk, const 
int optname, int len,
  u32 __user *optval, int __user *optlen)
 {
const struct ccid3_hc_rx_sock *hcrx;
+   struct tfrc_rx_info rx_info;
const void *val;
 
/* Listen socks doesn't have a private CCID block */
@@ -926,10 +927,14 @@ static int ccid3_hc_rx_getsockopt(struct sock *sk, const 
int optname, int len,
hcrx = ccid3_hc_rx_sk(sk);
switch (optname) {
case DCCP_SOCKOPT_CCID_RX_INFO:
-   if (len  sizeof(hcrx-ccid3hcrx_tfrc))
+   if (len  sizeof(rx_info))
return -EINVAL;
-   len = sizeof(hcrx-ccid3hcrx_tfrc);
-   val = hcrx-ccid3hcrx_tfrc;
+   rx_info.tfrcrx_x_recv = hcrx-ccid3hcrx_x_recv;
+   rx_info.tfrcrx_rtt= hcrx-ccid3hcrx_rtt;
+   rx_info.tfrcrx_p  = hcrx-ccid3hcrx_pinv == 0 ? ~0U :
+  scaled_div(1, hcrx-ccid3hcrx_pinv);
+   len = sizeof(rx_info);
+   val = rx_info;
break;
default:
return -ENOPROTOOPT;
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index e9f6ff4..49ca32b 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -139,6 +139,8 @@ enum ccid3_hc_rx_states {
  *  @ccid3hcrx_last_counter  -  Tracks window counter (RFC 4342, 8.1)
  *  @ccid3hcrx_state  -  Receiver state, one of %ccid3_hc_rx_states
  *  @ccid3hcrx_bytes_recv  -  Total sum of DCCP payload bytes
+ *  @ccid3hcrx_x_recv  -  Receiver estimate of send rate (RFC 3448, sec. 4.3)
+ *  @ccid3hcrx_rtt  -  Receiver estimate of RTT
  *  @ccid3hcrx_tstamp_last_feedback  -  Time at which last feedback was sent
  *  @ccid3hcrx_tstamp_last_ack  -  Time at which last feedback was sent
  *  @ccid3hcrx_hist  -  Packet history (loss detection + RTT sampling)
@@ -147,13 +149,11 @@ enum ccid3_hc_rx_states {
  *  @ccid3hcrx_pinv  -  Inverse of Loss Event Rate (RFC 4342, sec. 8.5)
  */
 struct ccid3_hc_rx_sock {
-   struct tfrc_rx_info ccid3hcrx_tfrc;
-#define ccid3hcrx_x_recv   ccid3hcrx_tfrc.tfrcrx_x_recv
-#define ccid3hcrx_rtt  ccid3hcrx_tfrc.tfrcrx_rtt
-#define ccid3hcrx_pccid3hcrx_tfrc.tfrcrx_p
u8  ccid3hcrx_last_counter:4;
enum ccid3_hc_rx_states ccid3hcrx_state:8;
u32 ccid3hcrx_bytes_recv;
+   u32 ccid3hcrx_x_recv;
+   u32 ccid3hcrx_rtt;
ktime_t ccid3hcrx_tstamp_last_feedback;
struct tfrc_rx_hist ccid3hcrx_hist;
struct tfrc_loss_hist   ccid3hcrx_li_hist;
-- 
1.5.3.6

-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 5/5] [DCCP]: Remove unused inline function

2007-12-17 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

The function follows48(), which is a special-case of dccp_delta_seqno(),
is nowhere used in the DCCP code, thus removed by this patch.

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Ian McDonald [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/dccp.h |6 --
 1 files changed, 0 insertions(+), 6 deletions(-)

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index b138e20..ebe59d9 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -153,12 +153,6 @@ static inline u64 max48(const u64 seq1, const u64 seq2)
return after48(seq1, seq2) ? seq1 : seq2;
 }
 
-/* is seq1 next seqno after seq2 */
-static inline int follows48(const u64 seq1, const u64 seq2)
-{
-   return dccp_delta_seqno(seq2, seq1) == 1;
-}
-
 enum {
DCCP_MIB_NUM = 0,
DCCP_MIB_ACTIVEOPENS,   /* ActiveOpens */
-- 
1.5.3.6

-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/5] [CCID3]: Nofeedback timer according to rfc3448bis

2007-12-17 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

This implements the changes to the nofeedback timer handling suggested
in draft rfc3448bis00, section 4.4. In particular, these changes mean:

 * better handling of the lossless case (p == 0)
 * the timestamp for computing t_ld becomes obsolete
 * much more recent document (RFC 3448 is almost 5 years old)
 * concepts in rfc3448bis arose from a real, working implementation
   (cf. sec. 12)

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Ian McDonald [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/ccids/ccid3.c |   63 ++--
 1 files changed, 29 insertions(+), 34 deletions(-)

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 7c8e9ad..00b5f11 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -131,12 +131,11 @@ static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock 
*hctx, ktime_t now)
  *
  */
 static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp)
-
 {
struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
__u64 min_rate = 2 * hctx-ccid3hctx_x_recv;
const  __u64 old_x = hctx-ccid3hctx_x;
-   ktime_t now = stamp? *stamp : ktime_get_real();
+   ktime_t now = stamp ? *stamp : ktime_get_real();
 
/*
 * Handle IDLE periods: do not reduce below RFC3390 initial sending rate
@@ -230,27 +229,27 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long 
data)
ccid3_pr_debug(%s(%p, state=%s) - entry \n, dccp_role(sk), sk,
   ccid3_tx_state_name(hctx-ccid3hctx_state));
 
-   switch (hctx-ccid3hctx_state) {
-   case TFRC_SSTATE_NO_FBACK:
-   /* RFC 3448, 4.4: Halve send rate directly */
+   if (hctx-ccid3hctx_state == TFRC_SSTATE_FBACK)
+   ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
+   else if (hctx-ccid3hctx_state != TFRC_SSTATE_NO_FBACK)
+   goto out;
+
+   /*
+* Determine new allowed sending rate X as per draft rfc3448bis-00, 4.4
+*/
+   if (hctx-ccid3hctx_t_rto == 0 ||   /* no feedback received yet */
+   hctx-ccid3hctx_p == 0) {
+
+   /* halve send rate directly */
hctx-ccid3hctx_x = max(hctx-ccid3hctx_x / 2,
(((__u64)hctx-ccid3hctx_s)  6) /
TFRC_T_MBI);
-
-   ccid3_pr_debug(%s(%p, state=%s), updated tx rate to %u 
-  bytes/s\n, dccp_role(sk), sk,
-  ccid3_tx_state_name(hctx-ccid3hctx_state),
-  (unsigned)(hctx-ccid3hctx_x  6));
-   /* The value of R is still undefined and so we can not recompute
-* the timout value. Keep initial value as per [RFC 4342, 5]. */
-   t_nfb = TFRC_INITIAL_TIMEOUT;
ccid3_update_send_interval(hctx);
-   break;
-   case TFRC_SSTATE_FBACK:
+   } else {
/*
-*  Modify the cached value of X_recv [RFC 3448, 4.4]
+*  Modify the cached value of X_recv
 *
-*  If (p == 0 || X_calc  2 * X_recv)
+*  If (X_calc  2 * X_recv)
 *X_recv = max(X_recv / 2, s / (2 * t_mbi));
 *  Else
 *X_recv = X_calc / 4;
@@ -259,32 +258,28 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long 
data)
 */
BUG_ON(hctx-ccid3hctx_p  !hctx-ccid3hctx_x_calc);
 
-   if (hctx-ccid3hctx_p == 0 ||
-   (hctx-ccid3hctx_x_calc  (hctx-ccid3hctx_x_recv  5))) {
-
+   if (hctx-ccid3hctx_x_calc  (hctx-ccid3hctx_x_recv  5))
hctx-ccid3hctx_x_recv =
max(hctx-ccid3hctx_x_recv / 2,
(((__u64)hctx-ccid3hctx_s)  6) /
  (2 * TFRC_T_MBI));
-   } else {
+   else {
hctx-ccid3hctx_x_recv = hctx-ccid3hctx_x_calc;
hctx-ccid3hctx_x_recv = 4;
}
-   /* Now recalculate X [RFC 3448, 4.3, step (4)] */
ccid3_hc_tx_update_x(sk, NULL);
-   /*
-* Schedule no feedback timer to expire in
-* max(t_RTO, 2 * s/X)  =  max(t_RTO, 2 * t_ipi)
-* See comments in packet_recv() regarding the value of t_RTO.
-*/
-   t_nfb = max(hctx-ccid3hctx_t_rto, 2 * hctx-ccid3hctx_t_ipi);
-   break;
-   case TFRC_SSTATE_NO_SENT:
-   DCCP_BUG(%s(%p) - Illegal state NO_SENT, dccp_role(sk), sk);
-   /* fall through */
-   case TFRC_SSTATE_TERM:
-   goto out;
}
+   ccid3_pr_debug(Reduced X to %llu/64

Re: [PATCHES 0/3]: DCCP patches for 2.6.25

2007-12-14 Thread Arnaldo Carvalho de Melo
Em Fri, Dec 14, 2007 at 11:29:14AM -0800, David Miller escreveu:
 From: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
 Date: Thu, 13 Dec 2007 23:41:59 -0200
 
  Please consider pulling from:
  
  master.kernel.org:/pub/scm/linux/kernel/git/acme/net-2.6.25
 
 Pulled, but could you please reformat Gerrit's changelog entries in
 the future?  They have these 80+ long lines which are painful to read
 in ascii email clients and in terminal output.
 
 I'll do this by hand during my next rebase for this case, but I will
 push back when I see it again in future pull requests.

OK, will take that into account in future requests,

Thanks a lot,

- Arnaldo
-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 05/12] [DCCP]: Collapse repeated `len' statements into one

2007-12-13 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

This replaces 4 individual assignments for `len' with a single
one, placed where the control flow of those 4 leads to.

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Ian McDonald [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/proto.c |5 +
 1 files changed, 1 insertions(+), 4 deletions(-)

diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index cc87c50..0bed4a6 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -657,19 +657,15 @@ static int do_dccp_getsockopt(struct sock *sk, int level, 
int optname,
   (__be32 __user *)optval, optlen);
case DCCP_SOCKOPT_GET_CUR_MPS:
val = dp-dccps_mss_cache;
-   len = sizeof(val);
break;
case DCCP_SOCKOPT_SERVER_TIMEWAIT:
val = dp-dccps_server_timewait;
-   len = sizeof(val);
break;
case DCCP_SOCKOPT_SEND_CSCOV:
val = dp-dccps_pcslen;
-   len = sizeof(val);
break;
case DCCP_SOCKOPT_RECV_CSCOV:
val = dp-dccps_pcrlen;
-   len = sizeof(val);
break;
case 128 ... 191:
return ccid_hc_rx_getsockopt(dp-dccps_hc_rx_ccid, sk, optname,
@@ -681,6 +677,7 @@ static int do_dccp_getsockopt(struct sock *sk, int level, 
int optname,
return -ENOPROTOOPT;
}
 
+   len = sizeof(val);
if (put_user(len, optlen) || copy_to_user(optval, val, len))
return -EFAULT;
 
-- 
1.5.3.4

-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 02/12] [DCCP]: Shift the retransmit timer for active-close into output.c

2007-12-13 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

When performing active close, RFC 4340, 8.3. requires to retransmit the
Close/CloseReq with a backoff-retransmit timer starting at intially 2 RTTs.

This patch shifts the existing code for active-close retransmit timer
into output.c, so that the retransmit timer is started when the first
Close/CloseReq is sent. Previously, the timer was started when, after
releasing the socket in dccp_close(), the actively-closing side had not yet
reached the CLOSED/TIMEWAIT state.

The patch further reduces the initial timeout from 3 seconds to the required
2 RTTs, where - in absence of a known RTT - the fallback value specified in
RFC 4340, 3.4 is used.

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Ian McDonald [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/output.c |   13 -
 net/dccp/proto.c  |   18 --
 2 files changed, 12 insertions(+), 19 deletions(-)

diff --git a/net/dccp/output.c b/net/dccp/output.c
index 7caa7f5..e97584a 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -574,7 +574,18 @@ void dccp_send_close(struct sock *sk, const int active)
dccp_write_xmit(sk, 1);
dccp_skb_entail(sk, skb);
dccp_transmit_skb(sk, skb_clone(skb, prio));
-   /* FIXME do we need a retransmit timer here? */
+   /*
+* Retransmission timer for active-close: RFC 4340, 8.3 requires
+* to retransmit the Close/CloseReq until the CLOSING/CLOSEREQ
+* state can be left. The initial timeout is 2 RTTs.
+* Since RTT measurement is done by the CCIDs, there is no easy
+* way to get an RTT sample. The fallback RTT from RFC 4340, 3.4
+* is too low (200ms); we use a high value to avoid unnecessary
+* retransmissions when the link RTT is  0.2 seconds.
+* FIXME: Let main module sample RTTs and use that instead.
+*/
+   inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+ DCCP_TIMEOUT_INIT, DCCP_RTO_MAX);
} else
dccp_transmit_skb(sk, skb);
 }
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 60f40ec..8a73c8f 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -996,24 +996,6 @@ adjudge_to_death:
if (state != DCCP_CLOSED  sk-sk_state == DCCP_CLOSED)
goto out;
 
-   /*
-* The last release_sock may have processed the CLOSE or RESET
-* packet moving sock to CLOSED state, if not we have to fire
-* the CLOSE/CLOSEREQ retransmission timer, see 8.3. Termination
-* in draft-ietf-dccp-spec-11. -acme
-*/
-   if (sk-sk_state == DCCP_CLOSING) {
-   /* FIXME: should start at 2 * RTT */
-   /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
-   inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
- inet_csk(sk)-icsk_rto,
- DCCP_RTO_MAX);
-#if 0
-   /* Yeah, we should use sk-sk_prot-orphan_count, etc */
-   dccp_set_state(sk, DCCP_CLOSED);
-#endif
-   }
-
if (sk-sk_state == DCCP_CLOSED)
inet_csk_destroy_sock(sk);
 
-- 
1.5.3.4

-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHES 0/12]: DCCP patches for 2.6.25

2007-12-13 Thread Arnaldo Carvalho de Melo
Hi David,

Please consider pulling from:

master.kernel.org:/pub/scm/linux/kernel/git/acme/net-2.6.25

Best Regards,

- Arnaldo

 Documentation/networking/dccp.txt |6 +
 include/linux/dccp.h  |   24 ++-
 net/dccp/dccp.h   |   10 ++-
 net/dccp/feat.c   |   29 
 net/dccp/feat.h   |   26 ---
 net/dccp/input.c  |   28 +---
 net/dccp/ipv4.c   |8 +-
 net/dccp/ipv6.c   |8 +-
 net/dccp/minisocks.c  |   33 +
 net/dccp/options.c|  126 +-
 net/dccp/output.c |   21 +-
 net/dccp/proto.c  |   34 +++---
 12 files changed, 208 insertions(+), 145 deletions(-)
-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 06/12] [DCCP]: Allow to parse options on Request Sockets

2007-12-13 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

The option parsing code currently only parses on full sk's. This causes a 
problem for
options sent during the initial handshake (in particular timestamps and 
feature-negotiation
options). Therefore, this patch extends the option parsing code with an 
additional argument
for request_socks: if it is non-NULL, options are parsed on the request socket, 
otherwise
the normal path (parsing on the sk) is used.

Subsequent patches, which implement feature negotiation during connection 
setup, make use
of this facility.

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Ian McDonald [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 include/linux/dccp.h |5 +++--
 net/dccp/input.c |6 +++---
 net/dccp/ipv4.c  |8 
 net/dccp/ipv6.c  |8 
 net/dccp/options.c   |   34 +++---
 5 files changed, 37 insertions(+), 24 deletions(-)

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index c676021..7214031 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -407,8 +407,6 @@ struct dccp_opt_pend {
 
 extern void dccp_minisock_init(struct dccp_minisock *dmsk);
 
-extern int dccp_parse_options(struct sock *sk, struct sk_buff *skb);
-
 struct dccp_request_sock {
struct inet_request_sock dreq_inet_rsk;
__u64dreq_iss;
@@ -423,6 +421,9 @@ static inline struct dccp_request_sock *dccp_rsk(const 
struct request_sock *req)
 
 extern struct inet_timewait_death_row dccp_death_row;
 
+extern int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
+ struct sk_buff *skb);
+
 struct dccp_options_received {
u32 dccpor_ndp; /* only 24 bits */
u32 dccpor_timestamp;
diff --git a/net/dccp/input.c b/net/dccp/input.c
index dacd4fd..08392ed 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -369,7 +369,7 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff 
*skb,
if (dccp_check_seqno(sk, skb))
goto discard;
 
-   if (dccp_parse_options(sk, skb))
+   if (dccp_parse_options(sk, NULL, skb))
goto discard;
 
if (DCCP_SKB_CB(skb)-dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
@@ -427,7 +427,7 @@ static int dccp_rcv_request_sent_state_process(struct sock 
*sk,
goto out_invalid_packet;
}
 
-   if (dccp_parse_options(sk, skb))
+   if (dccp_parse_options(sk, NULL, skb))
goto out_invalid_packet;
 
/* Obtain usec RTT sample from SYN exchange (used by CCID 3) */
@@ -609,7 +609,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff 
*skb,
/*
 * Step 8: Process options and mark acknowledgeable
 */
-   if (dccp_parse_options(sk, skb))
+   if (dccp_parse_options(sk, NULL, skb))
goto discard;
 
if (dcb-dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index db17b83..02fc91c 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -600,11 +600,12 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff 
*skb)
if (req == NULL)
goto drop;
 
-   if (dccp_parse_options(sk, skb))
-   goto drop_and_free;
-
dccp_reqsk_init(req, skb);
 
+   dreq = dccp_rsk(req);
+   if (dccp_parse_options(sk, dreq, skb))
+   goto drop_and_free;
+
if (security_inet_conn_request(sk, skb, req))
goto drop_and_free;
 
@@ -621,7 +622,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff 
*skb)
 * In fact we defer setting S.GSR, S.SWL, S.SWH to
 * dccp_create_openreq_child.
 */
-   dreq = dccp_rsk(req);
dreq-dreq_isr = dcb-dccpd_seq;
dreq-dreq_iss = dccp_v4_init_sequence(skb);
dreq-dreq_service = service;
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index a08e2cb..f42b75c 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -415,11 +415,12 @@ static int dccp_v6_conn_request(struct sock *sk, struct 
sk_buff *skb)
if (req == NULL)
goto drop;
 
-   if (dccp_parse_options(sk, skb))
-   goto drop_and_free;
-
dccp_reqsk_init(req, skb);
 
+   dreq = dccp_rsk(req);
+   if (dccp_parse_options(sk, dreq, skb))
+   goto drop_and_free;
+
if (security_inet_conn_request(sk, skb, req))
goto drop_and_free;
 
@@ -449,7 +450,6 @@ static int dccp_v6_conn_request(struct sock *sk, struct 
sk_buff *skb)
 *   In fact we defer setting S.GSR, S.SWL, S.SWH to
 *   dccp_create_openreq_child.
 */
-   dreq = dccp_rsk(req);
dreq-dreq_isr = dcb-dccpd_seq;
dreq-dreq_iss = dccp_v6_init_sequence(skb);
dreq-dreq_service = service

[PATCH 09/12] [DCCP]: Support inserting options during the 3-way handshake

2007-12-13 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

This provides a separate routine to insert options during the initial handshake.
The main purpose is to conduct feature negotiation, for the moment the only user
is the timestamp echo needed for the (CCID3) handshake RTT sample.

Padding of options has been put into a small separate routine, to be shared 
among
the two functions. This could also be used as a generic routine to finish 
inserting
options.

Also removed an `XXX' comment since its content was obvious.

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Ian McDonald [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/dccp.h|1 +
 net/dccp/options.c |   32 ++--
 net/dccp/output.c  |2 +-
 3 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 3af3320..b138e20 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -428,6 +428,7 @@ static inline int dccp_ack_pending(const struct sock *sk)
 }
 
 extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb);
+extern int dccp_insert_options_rsk(struct dccp_request_sock*, struct sk_buff*);
 extern int dccp_insert_option_elapsed_time(struct sock *sk,
struct sk_buff *skb,
u32 elapsed_time);
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 0c996d8..bedb5da 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -537,6 +537,18 @@ static int dccp_insert_options_feat(struct sock *sk, 
struct sk_buff *skb)
return 0;
 }
 
+/* The length of all options needs to be a multiple of 4 (5.8) */
+static void dccp_insert_option_padding(struct sk_buff *skb)
+{
+   int padding = DCCP_SKB_CB(skb)-dccpd_opt_len % 4;
+
+   if (padding != 0) {
+   padding = 4 - padding;
+   memset(skb_push(skb, padding), 0, padding);
+   DCCP_SKB_CB(skb)-dccpd_opt_len += padding;
+   }
+}
+
 int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
 {
struct dccp_sock *dp = dccp_sk(sk);
@@ -580,18 +592,18 @@ int dccp_insert_options(struct sock *sk, struct sk_buff 
*skb)
dccp_insert_option_timestamp_echo(dp, NULL, skb))
return -1;
 
-   /* XXX: insert other options when appropriate */
+   dccp_insert_option_padding(skb);
+   return 0;
+}
 
-   if (DCCP_SKB_CB(skb)-dccpd_opt_len != 0) {
-   /* The length of all options has to be a multiple of 4 */
-   int padding = DCCP_SKB_CB(skb)-dccpd_opt_len % 4;
+int dccp_insert_options_rsk(struct dccp_request_sock *dreq, struct sk_buff 
*skb)
+{
+   DCCP_SKB_CB(skb)-dccpd_opt_len = 0;
 
-   if (padding != 0) {
-   padding = 4 - padding;
-   memset(skb_push(skb, padding), 0, padding);
-   DCCP_SKB_CB(skb)-dccpd_opt_len += padding;
-   }
-   }
+   if (dreq-dreq_timestamp_echo != 0 
+   dccp_insert_option_timestamp_echo(NULL, dreq, skb))
+   return -1;
 
+   dccp_insert_option_padding(skb);
return 0;
 }
diff --git a/net/dccp/output.c b/net/dccp/output.c
index b2e1791..5589a5e 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -303,7 +303,7 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct 
dst_entry *dst,
DCCP_SKB_CB(skb)-dccpd_type = DCCP_PKT_RESPONSE;
DCCP_SKB_CB(skb)-dccpd_seq  = dreq-dreq_iss;
 
-   if (dccp_insert_options(sk, skb)) {
+   if (dccp_insert_options_rsk(dreq, skb)) {
kfree_skb(skb);
return NULL;
}
-- 
1.5.3.4

-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 07/12] [DCCP]: Add (missing) option parsing to request_sock processing

2007-12-13 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

This adds option-parsing code to processing of Acks in the listening state
on request_socks on the server, serving two purposes
 (i)  resolves a FIXME (removed);
 (ii) paves the way for feature-negotiation during connection-setup.

There is an intended subtlety here with regard to dccp_check_req:

 Parsing options happens only after testing whether the received packet is
 a retransmitted Request.  Otherwise, if the Request contained (a possibly
 large number of) feature-negotiation options, recomputing state would have to
 happen each time a retransmitted Request arrives, which opens the door to an
 easy DoS attack.  Since in a genuine retransmission the options should not be
 different from the original, reusing the already computed state seems better.

 The other point is - if there are timestamp options on the Request, they will
 not be answered; which means that in the presence of retransmission (likely
 due to loss and/or other problems), the use of Request/Response RTT sampling
 is suspended, so that startup problems here do not propagate.

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Ian McDonald [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/minisocks.c |   12 ++--
 1 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 831b76e..b1d5da6 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -200,10 +200,10 @@ struct sock *dccp_check_req(struct sock *sk, struct 
sk_buff *skb,
struct request_sock **prev)
 {
struct sock *child = NULL;
+   struct dccp_request_sock *dreq = dccp_rsk(req);
 
/* Check for retransmitted REQUEST */
if (dccp_hdr(skb)-dccph_type == DCCP_PKT_REQUEST) {
-   struct dccp_request_sock *dreq = dccp_rsk(req);
 
if (after48(DCCP_SKB_CB(skb)-dccpd_seq, dreq-dreq_isr)) {
dccp_pr_debug(Retransmitted REQUEST\n);
@@ -227,22 +227,22 @@ struct sock *dccp_check_req(struct sock *sk, struct 
sk_buff *skb,
goto drop;
 
/* Invalid ACK */
-   if (DCCP_SKB_CB(skb)-dccpd_ack_seq != dccp_rsk(req)-dreq_iss) {
+   if (DCCP_SKB_CB(skb)-dccpd_ack_seq != dreq-dreq_iss) {
dccp_pr_debug(Invalid ACK number: ack_seq=%llu, 
  dreq_iss=%llu\n,
  (unsigned long long)
  DCCP_SKB_CB(skb)-dccpd_ack_seq,
- (unsigned long long)
- dccp_rsk(req)-dreq_iss);
+ (unsigned long long) dreq-dreq_iss);
goto drop;
}
 
+   if (dccp_parse_options(sk, dreq, skb))
+goto drop;
+
child = inet_csk(sk)-icsk_af_ops-syn_recv_sock(sk, skb, req, NULL);
if (child == NULL)
goto listen_overflow;
 
-   /* FIXME: deal with options */
-
inet_csk_reqsk_queue_unlink(sk, req, prev);
inet_csk_reqsk_queue_removed(sk, req);
inet_csk_reqsk_queue_add(sk, req, child);
-- 
1.5.3.4

-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 12/12] [DCCP]: Ignore feature negotiation on Data packets

2007-12-13 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

This implements [RFC 4340, p. 32]: any feature negotiation options received
on DCCP-Data packets MUST be ignored.

Also added a FIXME for further processing, since the code currently (wrongly)
classifies empty Confirm options as invalid - this needs to be resolved in
a separate patch.

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Ian McDonald [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/options.c |6 +-
 1 files changed, 5 insertions(+), 1 deletions(-)

diff --git a/net/dccp/options.c b/net/dccp/options.c
index bedb5da..d2a84a2 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -132,6 +132,8 @@ int dccp_parse_options(struct sock *sk, struct 
dccp_request_sock *dreq,
case DCCPO_CHANGE_L:
/* fall through */
case DCCPO_CHANGE_R:
+   if (pkt_type == DCCP_PKT_DATA)
+   break;
if (len  2)
goto out_invalid_option;
rc = dccp_feat_change_recv(sk, opt, *value, value + 1,
@@ -148,7 +150,9 @@ int dccp_parse_options(struct sock *sk, struct 
dccp_request_sock *dreq,
case DCCPO_CONFIRM_L:
/* fall through */
case DCCPO_CONFIRM_R:
-   if (len  2)
+   if (pkt_type == DCCP_PKT_DATA)
+   break;
+   if (len  2)/* FIXME this disallows empty confirm */
goto out_invalid_option;
if (dccp_feat_confirm_recv(sk, opt, *value,
   value + 1, len - 1))
-- 
1.5.3.4

-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 01/12] [DCCP]: Perform SHUT_RD and SHUT_WR on receiving close

2007-12-13 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

This patch performs two changes:

1) Close the write-end in addition to the read-end when a fin-like segment
  (Close or CloseReq) is received by DCCP. This accounts for the fact that DCCP,
  in contrast to TCP, does not have a half-close. RFC 4340 says in this respect
  that when a fin-like segment has been sent there is no guarantee at all that
  any   further data will be processed.
  Thus this patch performs SHUT_WR in addition to the SHUT_RD when a fin-like
  segment is encountered.

2) Minor change: I noted that code appears twice in different places and think 
it
   makes sense to put this into a self-contained function (dccp_enqueue()).

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Ian McDonald [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/input.c |   22 +++---
 1 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/net/dccp/input.c b/net/dccp/input.c
index decf2f2..dacd4fd 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -22,16 +22,27 @@
 /* rate-limit for syncs in reply to sequence-invalid packets; RFC 4340, 7.5.4 
*/
 int sysctl_dccp_sync_ratelimit __read_mostly = HZ / 8;
 
-static void dccp_fin(struct sock *sk, struct sk_buff *skb)
+static void dccp_enqueue_skb(struct sock *sk, struct sk_buff *skb)
 {
-   sk-sk_shutdown |= RCV_SHUTDOWN;
-   sock_set_flag(sk, SOCK_DONE);
__skb_pull(skb, dccp_hdr(skb)-dccph_doff * 4);
__skb_queue_tail(sk-sk_receive_queue, skb);
skb_set_owner_r(skb, sk);
sk-sk_data_ready(sk, 0);
 }
 
+static void dccp_fin(struct sock *sk, struct sk_buff *skb)
+{
+   /*
+* On receiving Close/CloseReq, both RD/WR shutdown are performed.
+* RFC 4340, 8.3 says that we MAY send further Data/DataAcks after
+* receiving the closing segment, but there is no guarantee that such
+* data will be processed at all.
+*/
+   sk-sk_shutdown = SHUTDOWN_MASK;
+   sock_set_flag(sk, SOCK_DONE);
+   dccp_enqueue_skb(sk, skb);
+}
+
 static int dccp_rcv_close(struct sock *sk, struct sk_buff *skb)
 {
int queued = 0;
@@ -282,10 +293,7 @@ static int __dccp_rcv_established(struct sock *sk, struct 
sk_buff *skb,
 * - sk_shutdown == RCV_SHUTDOWN, use Code 1, Not Listening
 * - sk_receive_queue is full, use Code 2, Receive Buffer
 */
-   __skb_pull(skb, dh-dccph_doff * 4);
-   __skb_queue_tail(sk-sk_receive_queue, skb);
-   skb_set_owner_r(skb, sk);
-   sk-sk_data_ready(sk, 0);
+   dccp_enqueue_skb(sk, skb);
return 0;
case DCCP_PKT_ACK:
goto discard;
-- 
1.5.3.4

-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 03/12] [DCCP]: Use maximum-RTO backoff from DCCP spec

2007-12-13 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

This removes another Fixme, using the TCP maximum RTO rather than the value
specified by the DCCP specification. Across the sections in RFC 4340, 64
seconds is consistently suggested as maximum RTO backoff value; and this is
the value which is now used.

I have checked both termination cases for retransmissions of Close/CloseReq:
with the default value 15 of `retries2', and an initial icsk_retransmit = 0,
it takes about 614 seconds to declare a non-responding peer as dead, after
which the final terminating Reset is sent. With the TCP maximum RTO value of
120 seconds it takes (as might be expected) almost twice as long, about 23
minutes.

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Ian McDonald [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/dccp.h |9 -
 1 files changed, 8 insertions(+), 1 deletions(-)

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 07dcbe7..3af3320 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -72,7 +72,14 @@ extern void dccp_time_wait(struct sock *sk, int state, int 
timeo);
 /* RFC 1122, 4.2.3.1 initial RTO value */
 #define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ))
 
-#define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */
+/*
+ * The maximum back-off value for retransmissions. This is needed for
+ *  - retransmitting client-Requests (sec. 8.1.1),
+ *  - retransmitting Close/CloseReq when closing (sec. 8.3),
+ *  - feature-negotiation retransmission (sec. 6.6.3),
+ *  - Acks in client-PARTOPEN state (sec. 8.1.5).
+ */
+#define DCCP_RTO_MAX ((unsigned)(64 * HZ))
 
 /*
  * RTT sampling: sanity bounds and fallback RTT value from RFC 4340, section 
3.4
-- 
1.5.3.4

-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 04/12] [DCCP]: Support for server holding timewait state

2007-12-13 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

This adds a socket option and signalling support for the case where the server
holds timewait state on closing the connection, as described in RFC 4340, 8.3.

Since holding timewait state at the server is the non-usual case, it is enabled
via a socket option. Documentation for this socket option has been added.

The setsockopt statement has been made resilient against different possible 
cases
of expressing boolean `true' values using a suggestion by Ian McDonald.

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Ian McDonald [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 Documentation/networking/dccp.txt |6 ++
 include/linux/dccp.h  |3 +++
 net/dccp/output.c |6 --
 net/dccp/proto.c  |   13 -
 4 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/Documentation/networking/dccp.txt 
b/Documentation/networking/dccp.txt
index d76905a..39131a3 100644
--- a/Documentation/networking/dccp.txt
+++ b/Documentation/networking/dccp.txt
@@ -57,6 +57,12 @@ can be set before calling bind().
 DCCP_SOCKOPT_GET_CUR_MPS is read-only and retrieves the current maximum packet
 size (application payload size) in bytes, see RFC 4340, section 14.
 
+DCCP_SOCKOPT_SERVER_TIMEWAIT enables the server (listening socket) to hold
+timewait state when closing the connection (RFC 4340, 8.3). The usual case is
+that the closing server sends a CloseReq, whereupon the client holds timewait
+state. When this boolean socket option is on, the server sends a Close instead
+and will enter TIMEWAIT. This option must be set after accept() returns.
+
 DCCP_SOCKOPT_SEND_CSCOV and DCCP_SOCKOPT_RECV_CSCOV are used for setting the
 partial checksum coverage (RFC 4340, sec. 9.2). The default is that checksums
 always cover the entire packet and that only fully covered application data is
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 312b989..c676021 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -205,6 +205,7 @@ struct dccp_so_feat {
 #define DCCP_SOCKOPT_CHANGE_L  3
 #define DCCP_SOCKOPT_CHANGE_R  4
 #define DCCP_SOCKOPT_GET_CUR_MPS   5
+#define DCCP_SOCKOPT_SERVER_TIMEWAIT   6
 #define DCCP_SOCKOPT_SEND_CSCOV10
 #define DCCP_SOCKOPT_RECV_CSCOV11
 #define DCCP_SOCKOPT_CCID_RX_INFO  128
@@ -492,6 +493,7 @@ struct dccp_ackvec;
  * @dccps_role - role of this sock, one of %dccp_role
  * @dccps_hc_rx_insert_options - receiver wants to add options when acking
  * @dccps_hc_tx_insert_options - sender wants to add options when sending
+ * @dccps_server_timewait - server holds timewait state on close (RFC 4340, 
8.3)
  * @dccps_xmit_timer - timer for when CCID is not ready to send
  * @dccps_syn_rtt - RTT sample from Request/Response exchange (in usecs)
  */
@@ -528,6 +530,7 @@ struct dccp_sock {
enum dccp_role  dccps_role:2;
__u8dccps_hc_rx_insert_options:1;
__u8dccps_hc_tx_insert_options:1;
+   __u8dccps_server_timewait:1;
struct timer_list   dccps_xmit_timer;
 };
 
diff --git a/net/dccp/output.c b/net/dccp/output.c
index e97584a..b2e1791 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -567,8 +567,10 @@ void dccp_send_close(struct sock *sk, const int active)
 
/* Reserve space for headers and prepare control bits. */
skb_reserve(skb, sk-sk_prot-max_header);
-   DCCP_SKB_CB(skb)-dccpd_type = dp-dccps_role == DCCP_ROLE_CLIENT ?
-   DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ;
+   if (dp-dccps_role == DCCP_ROLE_SERVER  !dp-dccps_server_timewait)
+   DCCP_SKB_CB(skb)-dccpd_type = DCCP_PKT_CLOSEREQ;
+   else
+   DCCP_SKB_CB(skb)-dccpd_type = DCCP_PKT_CLOSE;
 
if (active) {
dccp_write_xmit(sk, 1);
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 8a73c8f..cc87c50 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -551,6 +551,12 @@ static int do_dccp_setsockopt(struct sock *sk, int level, 
int optname,
 (struct dccp_so_feat 
__user *)
 optval);
break;
+   case DCCP_SOCKOPT_SERVER_TIMEWAIT:
+   if (dp-dccps_role != DCCP_ROLE_SERVER)
+   err = -EOPNOTSUPP;
+   else
+   dp-dccps_server_timewait = (val != 0);
+   break;
case DCCP_SOCKOPT_SEND_CSCOV:   /* sender side, RFC 4340, sec. 9.2 */
if (val  0 || val  15)
err = -EINVAL;
@@ -653,6 +659,10 @@ static int do_dccp_getsockopt(struct sock *sk, int level, 
int optname,
val = dp-dccps_mss_cache;
len = sizeof(val

[PATCH 08/12] [DCCP]: Handle timestamps on Request/Response exchange separately

2007-12-13 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

In DCCP, timestamps can occur on packets anytime, CCID3 uses a timestamp(/echo) 
on the Request/Response
exchange. This patch addresses the following situation:
* timestamps are recorded on the listening socket;
* Responses are sent from dccp_request_sockets;
* suppose two connections reach the listening socket with very small 
time in between:
* the first timestamp value gets overwritten by the second connection 
request.

This is not really good, so this patch separates timestamps into
 * those which are received by the server during the initial handshake (on 
dccp_request_sock);
 * those which are received by the client or the client after connection 
establishment.

As before, a timestamp of 0 is regarded as indicating that no (meaningful) 
timestamp has been
received (in addition, a warning message is printed if hosts send 0-valued 
timestamps).

The timestamp-echoing now works as follows:
 * when a timestamp is present on the initial Request, it is placed into dreq, 
due to the
   call to dccp_parse_options in dccp_v{4,6}_conn_request;
 * when a timestamp is present on the Ack leading from RESPOND = OPEN, it is 
copied over
   from the request_sock into the child cocket in dccp_create_openreq_child;
 * timestamps received on an (established) dccp_sock are treated as before.

Since Elapsed Time is measured in hundredths of milliseconds (13.2), the new 
dccp_timestamp()
function is used, as it is expected that the time between receiving the 
timestamp and
sending the timestamp echo will be very small against the wrap-around time. As 
a byproduct,
this allows smaller timestamping-time fields.

Furthermore, inserting the Timestamp Echo option has been taken out of the 
block starting with
'!dccp_packet_without_ack()', since Timestamp Echo can be carried on any packet 
(5.8 and 13.3).

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Acked-by: Ian McDonald [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 include/linux/dccp.h |   16 -
 net/dccp/minisocks.c |   21 +++---
 net/dccp/options.c   |   56 -
 3 files changed, 63 insertions(+), 30 deletions(-)

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 7214031..484e45c 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -407,11 +407,23 @@ struct dccp_opt_pend {
 
 extern void dccp_minisock_init(struct dccp_minisock *dmsk);
 
+/**
+ * struct dccp_request_sock  -  represent DCCP-specific connection request
+ * @dreq_inet_rsk: structure inherited from
+ * @dreq_iss: initial sequence number sent on the Response (RFC 4340, 7.1)
+ * @dreq_isr: initial sequence number received on the Request
+ * @dreq_service: service code present on the Request (there is just one)
+ * The following two fields are analogous to the ones in dccp_sock:
+ * @dreq_timestamp_echo: last received timestamp to echo (13.1)
+ * @dreq_timestamp_echo: the time of receiving the last @dreq_timestamp_echo
+ */
 struct dccp_request_sock {
struct inet_request_sock dreq_inet_rsk;
__u64dreq_iss;
__u64dreq_isr;
__be32   dreq_service;
+   __u32dreq_timestamp_echo;
+   __u32dreq_timestamp_time;
 };
 
 static inline struct dccp_request_sock *dccp_rsk(const struct request_sock 
*req)
@@ -477,8 +489,8 @@ struct dccp_ackvec;
  * @dccps_gar - greatest valid ack number received on a non-Sync; initialized 
to %dccps_iss
  * @dccps_service - first (passive sock) or unique (active sock) service code
  * @dccps_service_list - second .. last service code on passive socket
- * @dccps_timestamp_time - time of latest TIMESTAMP option
  * @dccps_timestamp_echo - latest timestamp received on a TIMESTAMP option
+ * @dccps_timestamp_time - time of receiving latest @dccps_timestamp_echo
  * @dccps_l_ack_ratio - feature-local Ack Ratio
  * @dccps_r_ack_ratio - feature-remote Ack Ratio
  * @dccps_pcslen - sender   partial checksum coverage (via sockopt)
@@ -514,8 +526,8 @@ struct dccp_sock {
__u64   dccps_gar;
__be32  dccps_service;
struct dccp_service_list*dccps_service_list;
-   ktime_t dccps_timestamp_time;
__u32   dccps_timestamp_echo;
+   __u32   dccps_timestamp_time;
__u16   dccps_l_ack_ratio;
__u16   dccps_r_ack_ratio;
__u16   dccps_pcslen;
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index b1d5da6..027d181 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -117,11 +117,13 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
struct dccp_sock *newdp = dccp_sk(newsk

[PATCH 1/3] [DCCP]: Documentation for CCID operations

2007-12-13 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

This adds documentation for the ccid_operations structure.

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Ian McDonald [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/ccid.h |   35 ---
 1 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index c65cb24..e3cdd8a 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -23,14 +23,35 @@
 
 struct tcp_info;
 
+/**
+ *  struct ccid_operations  -  Interface to Congestion-Control Infrastructure
+ *
+ *  @ccid_id: numerical CCID ID (up to %CCID_MAX, cf. table 5 in RFC 4340, 10.)
+ *  @ccid_name: alphabetical identifier string for @ccid_id
+ *  @ccid_owner: module which implements/owns this CCID
+ *  @ccid_hc_{r,t}x_slab: memory pool for the receiver/sender half-connection
+ *  @ccid_hc_{r,t}x_obj_size: size of the receiver/sender half-connection 
socket
+ *
+ *  @ccid_hc_{r,t}x_init: CCID-specific initialisation routine (before startup)
+ *  @ccid_hc_{r,t}x_exit: CCID-specific cleanup routine (before destruction)
+ *  @ccid_hc_rx_packet_recv: implements the HC-receiver side
+ *  @ccid_hc_{r,t}x_parse_options: parsing routine for CCID/HC-specific options
+ *  @ccid_hc_{r,t}x_insert_options: insert routine for CCID/HC-specific options
+ *  @ccid_hc_tx_packet_recv: implements feedback processing for the HC-sender
+ *  @ccid_hc_tx_send_packet: implements the sending part of the HC-sender
+ *  @ccid_hc_tx_packet_sent: does accounting for packets in flight by HC-sender
+ *  @ccid_hc_{r,t}x_get_info: INET_DIAG information for HC-receiver/sender
+ *  @ccid_hc_{r,t}x_getsockopt: socket options specific to HC-receiver/sender
+ */
 struct ccid_operations {
-   unsigned char   ccid_id;
-   const char  *ccid_name;
-   struct module   *ccid_owner;
-   struct kmem_cache   *ccid_hc_rx_slab;
-   __u32   ccid_hc_rx_obj_size;
-   struct kmem_cache   *ccid_hc_tx_slab;
-   __u32   ccid_hc_tx_obj_size;
+   unsigned char   ccid_id;
+   const char  *ccid_name;
+   struct module   *ccid_owner;
+   struct kmem_cache   *ccid_hc_rx_slab,
+   *ccid_hc_tx_slab;
+   __u32   ccid_hc_rx_obj_size,
+   ccid_hc_tx_obj_size;
+   /* Interface Routines */
int (*ccid_hc_rx_init)(struct ccid *ccid, struct sock *sk);
int (*ccid_hc_tx_init)(struct ccid *ccid, struct sock *sk);
void(*ccid_hc_rx_exit)(struct sock *sk);
-- 
1.5.3.4

-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/3] [CCID]: More informative registration

2007-12-13 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

The patch makes the registration messages of CCID 2/3 a bit more
informative: instead of repeating the CCID number as currently done,

CCID: Registered CCID 2 (ccid2)  or
CCID: Registered CCID 3 (ccid3),

the descriptive names of the CCID's (from RFCs) are now used:

CCID: Registered CCID 2 (TCP-like) and
CCID: Registered CCID 3 (TCP-Friendly Rate Control).

To allow spaces in the name, the slab name string has been changed to
refer to the numeric CCID identifier, using the same format as before.

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Ian McDonald [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/ccid.c|8 
 net/dccp/ccids/ccid2.c |2 +-
 net/dccp/ccids/ccid3.c |2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
index c45088b..4809753 100644
--- a/net/dccp/ccid.c
+++ b/net/dccp/ccid.c
@@ -92,15 +92,15 @@ int ccid_register(struct ccid_operations *ccid_ops)
 
ccid_ops-ccid_hc_rx_slab =
ccid_kmem_cache_create(ccid_ops-ccid_hc_rx_obj_size,
-  %s_hc_rx_sock,
-  ccid_ops-ccid_name);
+  ccid%u_hc_rx_sock,
+  ccid_ops-ccid_id);
if (ccid_ops-ccid_hc_rx_slab == NULL)
goto out;
 
ccid_ops-ccid_hc_tx_slab =
ccid_kmem_cache_create(ccid_ops-ccid_hc_tx_obj_size,
-  %s_hc_tx_sock,
-  ccid_ops-ccid_name);
+  ccid%u_hc_tx_sock,
+  ccid_ops-ccid_id);
if (ccid_ops-ccid_hc_tx_slab == NULL)
goto out_free_rx_slab;
 
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 48eeb44..b5b52eb 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -770,7 +770,7 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct 
sk_buff *skb)
 
 static struct ccid_operations ccid2 = {
.ccid_id= DCCPC_CCID2,
-   .ccid_name  = ccid2,
+   .ccid_name  = TCP-like,
.ccid_owner = THIS_MODULE,
.ccid_hc_tx_obj_size= sizeof(struct ccid2_hc_tx_sock),
.ccid_hc_tx_init= ccid2_hc_tx_init,
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index a818a1e..876c747 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -943,7 +943,7 @@ static int ccid3_hc_rx_getsockopt(struct sock *sk, const 
int optname, int len,
 
 static struct ccid_operations ccid3 = {
.ccid_id   = DCCPC_CCID3,
-   .ccid_name = ccid3,
+   .ccid_name = TCP-Friendly Rate Control,
.ccid_owner= THIS_MODULE,
.ccid_hc_tx_obj_size   = sizeof(struct ccid3_hc_tx_sock),
.ccid_hc_tx_init   = ccid3_hc_tx_init,
-- 
1.5.3.4

-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/3] [DCCP]: Introducing CCMPS

2007-12-13 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

This introduces a CCMPS field for setting a CCID-specific upper bound on the 
application payload
size, as is defined in RFC 4340, section 14.

Only the TX CCID is considered in setting this limit, since the RX CCID 
generates comparatively
small (DCCP-Ack) feedback packets. The CCMPS field includes network and 
transport layer header
lengths. The only current CCMPS customer is CCID4 (via RFC 4828).

A wrapper is used to allow querying the CCMPS even at times where the CCID 
modules may not have
been fully negotiated yet.

In dccp_sync_mss() the variable `mss_now' has been renamed into `cur_mps', to 
reflect that we are
dealing with an MPS, but not an MSS.
Since the DCCP code closely follows the TCP code, the identifiers 
`dccp_sync_mss' and
`dccps_mss_cache' have been kept, as they have direct TCP counterparts.

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Ian McDonald [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/ccid.h   |2 ++
 net/dccp/output.c |   30 +++---
 2 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index e3cdd8a..fdeae7b 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -27,6 +27,7 @@ struct tcp_info;
  *  struct ccid_operations  -  Interface to Congestion-Control Infrastructure
  *
  *  @ccid_id: numerical CCID ID (up to %CCID_MAX, cf. table 5 in RFC 4340, 10.)
+ *  @ccid_ccmps: the CCMPS including network/transport headers (0 when 
disabled)
  *  @ccid_name: alphabetical identifier string for @ccid_id
  *  @ccid_owner: module which implements/owns this CCID
  *  @ccid_hc_{r,t}x_slab: memory pool for the receiver/sender half-connection
@@ -45,6 +46,7 @@ struct tcp_info;
  */
 struct ccid_operations {
unsigned char   ccid_id;
+   __u32   ccid_ccmps;
const char  *ccid_name;
struct module   *ccid_owner;
struct kmem_cache   *ccid_hc_rx_slab,
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 5589a5e..3b763db 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -133,15 +133,31 @@ static int dccp_transmit_skb(struct sock *sk, struct 
sk_buff *skb)
return -ENOBUFS;
 }
 
+/**
+ * dccp_determine_ccmps  -  Find out about CCID-specfic packet-size limits
+ * We only consider the HC-sender CCID for setting the CCMPS (RFC 4340, 14.),
+ * since the RX CCID is restricted to feedback packets (Acks), which are small
+ * in comparison with the data traffic. A value of 0 means no current CCMPS.
+ */
+static u32 dccp_determine_ccmps(const struct dccp_sock *dp)
+{
+   const struct ccid *tx_ccid = dp-dccps_hc_tx_ccid;
+
+   if (tx_ccid == NULL || tx_ccid-ccid_ops == NULL)
+   return 0;
+   return tx_ccid-ccid_ops-ccid_ccmps;
+}
+
 unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
 {
struct inet_connection_sock *icsk = inet_csk(sk);
struct dccp_sock *dp = dccp_sk(sk);
-   int mss_now = (pmtu - icsk-icsk_af_ops-net_header_len -
-  sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext));
+   u32 ccmps = dccp_determine_ccmps(dp);
+   int cur_mps = ccmps ? min(pmtu, ccmps) : pmtu;
 
-   /* Now subtract optional transport overhead */
-   mss_now -= icsk-icsk_ext_hdr_len;
+   /* Account for header lengths and IPv4/v6 option overhead */
+   cur_mps -= (icsk-icsk_af_ops-net_header_len + icsk-icsk_ext_hdr_len +
+   sizeof(struct dccp_hdr) + sizeof(struct dccp_hdr_ext));
 
/*
 * FIXME: this should come from the CCID infrastructure, where, say,
@@ -151,13 +167,13 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
 * make it a multiple of 4
 */
 
-   mss_now -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4;
+   cur_mps -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4;
 
/* And store cached results */
icsk-icsk_pmtu_cookie = pmtu;
-   dp-dccps_mss_cache = mss_now;
+   dp-dccps_mss_cache = cur_mps;
 
-   return mss_now;
+   return cur_mps;
 }
 
 EXPORT_SYMBOL_GPL(dccp_sync_mss);
-- 
1.5.3.4

-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHES 0/3]: DCCP patches for 2.6.25

2007-12-13 Thread Arnaldo Carvalho de Melo
Hi David,

Please consider pulling from:

master.kernel.org:/pub/scm/linux/kernel/git/acme/net-2.6.25

Best Regards,

- Arnaldo

 net/dccp/ccid.c|8 
 net/dccp/ccid.h|   37 ++---
 net/dccp/ccids/ccid2.c |2 +-
 net/dccp/ccids/ccid3.c |2 +-
 net/dccp/output.c  |   30 +++---
 5 files changed, 59 insertions(+), 20 deletions(-)
-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/7] [TFRC]: Loss interval code needs the macros/inlines that were moved

2007-12-12 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

This moves the inlines (which were previously declared as macros) back into
packet_history.h since the loss detection code needs to be able to read entries
from the RX history in order to create the relevant loss entries: it needs at
least tfrc_rx_hist_loss_prev() and tfrc_rx_hist_last_rcv(), which in turn
require the definition of the other inlines (macros).

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/ccids/lib/packet_history.c |   35 ---
 net/dccp/ccids/lib/packet_history.h |   35 +++
 2 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/net/dccp/ccids/lib/packet_history.c 
b/net/dccp/ccids/lib/packet_history.c
index 727b17d..dd2cf2d 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -151,23 +151,6 @@ void tfrc_rx_packet_history_exit(void)
}
 }
 
-/**
- * tfrc_rx_hist_index - index to reach n-th entry after loss_start
- */
-static inline u8 tfrc_rx_hist_index(const struct tfrc_rx_hist *h, const u8 n)
-{
-   return (h-loss_start + n)  TFRC_NDUPACK;
-}
-
-/**
- * tfrc_rx_hist_last_rcv - entry with highest-received-seqno so far
- */
-static inline struct tfrc_rx_hist_entry *
-   tfrc_rx_hist_last_rcv(const struct tfrc_rx_hist *h)
-{
-   return h-ring[tfrc_rx_hist_index(h, h-loss_count)];
-}
-
 void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
 const struct sk_buff *skb,
 const u32 ndp)
@@ -183,24 +166,6 @@ void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
 }
 EXPORT_SYMBOL_GPL(tfrc_rx_hist_add_packet);
 
-/**
- * tfrc_rx_hist_entry - return the n-th history entry after loss_start
- */
-static inline struct tfrc_rx_hist_entry *
-   tfrc_rx_hist_entry(const struct tfrc_rx_hist *h, const u8 n)
-{
-   return h-ring[tfrc_rx_hist_index(h, n)];
-}
-
-/**
- * tfrc_rx_hist_loss_prev - entry with highest-received-seqno before loss was 
detected
- */
-static inline struct tfrc_rx_hist_entry *
-   tfrc_rx_hist_loss_prev(const struct tfrc_rx_hist *h)
-{
-   return h-ring[h-loss_start];
-}
-
 /* has the packet contained in skb been seen before? */
 int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb)
 {
diff --git a/net/dccp/ccids/lib/packet_history.h 
b/net/dccp/ccids/lib/packet_history.h
index 3dfd182..e58b0fc 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -84,6 +84,41 @@ struct tfrc_rx_hist {
 #define rtt_sample_prev  loss_start
 };
 
+/**
+ * tfrc_rx_hist_index - index to reach n-th entry after loss_start
+ */
+static inline u8 tfrc_rx_hist_index(const struct tfrc_rx_hist *h, const u8 n)
+{
+   return (h-loss_start + n)  TFRC_NDUPACK;
+}
+
+/**
+ * tfrc_rx_hist_last_rcv - entry with highest-received-seqno so far
+ */
+static inline struct tfrc_rx_hist_entry *
+   tfrc_rx_hist_last_rcv(const struct tfrc_rx_hist *h)
+{
+   return h-ring[tfrc_rx_hist_index(h, h-loss_count)];
+}
+
+/**
+ * tfrc_rx_hist_entry - return the n-th history entry after loss_start
+ */
+static inline struct tfrc_rx_hist_entry *
+   tfrc_rx_hist_entry(const struct tfrc_rx_hist *h, const 
u8 n)
+{
+   return h-ring[tfrc_rx_hist_index(h, n)];
+}
+
+/**
+ * tfrc_rx_hist_loss_prev - entry with highest-received-seqno before loss was 
detected
+ */
+static inline struct tfrc_rx_hist_entry *
+   tfrc_rx_hist_loss_prev(const struct tfrc_rx_hist *h)
+{
+   return h-ring[h-loss_start];
+}
+
 extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
const struct sk_buff *skb, const u32 ndp);
 
-- 
1.5.3.4

-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 5/7] [TFRC]: CCID3 (and CCID4) needs to access these inlines

2007-12-12 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

This moves two inlines back to packet_history.h: these are not private
to packet_history.c, but are needed by CCID3/4 to detect whether a new
loss is indicated, or whether a loss is already pending.

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/ccids/lib/packet_history.c |   26 --
 net/dccp/ccids/lib/packet_history.h |   35 +++
 2 files changed, 31 insertions(+), 30 deletions(-)

diff --git a/net/dccp/ccids/lib/packet_history.c 
b/net/dccp/ccids/lib/packet_history.c
index 5b10a1e..20af1a6 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -191,32 +191,6 @@ int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct 
sk_buff *skb)
 }
 EXPORT_SYMBOL_GPL(tfrc_rx_hist_duplicate);
 
-/* initialise loss detection and disable RTT sampling */
-static inline void tfrc_rx_hist_loss_indicated(struct tfrc_rx_hist *h)
-{
-   h-loss_count = 1;
-}
-
-/* indicate whether previously a packet was detected missing */
-static inline int tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h)
-{
-   return h-loss_count;
-}
-
-/* any data packets missing between last reception and skb ? */
-int tfrc_rx_hist_new_loss_indicated(struct tfrc_rx_hist *h,
-   const struct sk_buff *skb, u32 ndp)
-{
-   int delta = dccp_delta_seqno(tfrc_rx_hist_last_rcv(h)-tfrchrx_seqno,
-DCCP_SKB_CB(skb)-dccpd_seq);
-
-   if (delta  1  ndp  delta)
-   tfrc_rx_hist_loss_indicated(h);
-
-   return tfrc_rx_hist_loss_pending(h);
-}
-EXPORT_SYMBOL_GPL(tfrc_rx_hist_new_loss_indicated);
-
 static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b)
 {
const u8 idx_a = tfrc_rx_hist_index(h, a),
diff --git a/net/dccp/ccids/lib/packet_history.h 
b/net/dccp/ccids/lib/packet_history.h
index 24edd8d..c7eeda4 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -118,16 +118,43 @@ static inline struct tfrc_rx_hist_entry *
return h-ring[h-loss_start];
 }
 
+/* initialise loss detection and disable RTT sampling */
+static inline void tfrc_rx_hist_loss_indicated(struct tfrc_rx_hist *h)
+{
+   h-loss_count = 1;
+}
+
+/* indicate whether previously a packet was detected missing */
+static inline int tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h)
+{
+   return h-loss_count;
+}
+
+/* any data packets missing between last reception and skb ? */
+static inline int tfrc_rx_hist_new_loss_indicated(struct tfrc_rx_hist *h,
+ const struct sk_buff *skb,
+ u32 ndp)
+{
+   int delta = dccp_delta_seqno(tfrc_rx_hist_last_rcv(h)-tfrchrx_seqno,
+DCCP_SKB_CB(skb)-dccpd_seq);
+
+   if (delta  1  ndp  delta)
+   tfrc_rx_hist_loss_indicated(h);
+
+   return tfrc_rx_hist_loss_pending(h);
+}
+
 extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
const struct sk_buff *skb, const u32 ndp);
 
 extern int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb);
-extern int tfrc_rx_hist_new_loss_indicated(struct tfrc_rx_hist *h,
-  const struct sk_buff *skb, u32 ndp);
+
 struct tfrc_loss_hist;
-extern int  tfrc_rx_handle_loss(struct tfrc_rx_hist *, struct tfrc_loss_hist *,
+extern int  tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
+   struct tfrc_loss_hist *lh,
struct sk_buff *skb, u32 ndp,
-   u32 (*first_li)(struct sock *), struct sock *);
+   u32 (*first_li)(struct sock *sk),
+   struct sock *sk);
 extern u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h,
   const struct sk_buff *skb);
 extern int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h);
-- 
1.5.3.4

-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/7] [CCID3]: Redundant debugging output / documentation

2007-12-12 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

Each time feedback is sent two lines are printed:

ccid3_hc_rx_send_feedback: client ... - entry
ccid3_hc_rx_send_feedback: Interval ...usec, X_recv=..., 1/p=...

The first line is redundant and thus removed.

Further, documentation of ccid3_hc_rx_sock (capitalisation) is made consistent.

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/ccids/ccid3.c |2 --
 net/dccp/ccids/ccid3.h |4 ++--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 60fcb31..b92069b 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -685,8 +685,6 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
ktime_t now;
s64 delta = 0;
 
-   ccid3_pr_debug(%s(%p) - entry \n, dccp_role(sk), sk);
-
if (unlikely(hcrx-ccid3hcrx_state == TFRC_RSTATE_TERM))
return;
 
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 3c33dc6..6ceeb80 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -135,9 +135,9 @@ enum ccid3_hc_rx_states {
  *
  *  @ccid3hcrx_x_recv  -  Receiver estimate of send rate (RFC 3448 4.3)
  *  @ccid3hcrx_rtt  -  Receiver estimate of rtt (non-standard)
- *  @ccid3hcrx_p  -  current loss event rate (RFC 3448 5.4)
+ *  @ccid3hcrx_p  -  Current loss event rate (RFC 3448 5.4)
  *  @ccid3hcrx_last_counter  -  Tracks window counter (RFC 4342, 8.1)
- *  @ccid3hcrx_state  -  receiver state, one of %ccid3_hc_rx_states
+ *  @ccid3hcrx_state  -  Receiver state, one of %ccid3_hc_rx_states
  *  @ccid3hcrx_bytes_recv  -  Total sum of DCCP payload bytes
  *  @ccid3hcrx_tstamp_last_feedback  -  Time at which last feedback was sent
  *  @ccid3hcrx_tstamp_last_ack  -  Time at which last feedback was sent
-- 
1.5.3.4

-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHES 0/7]: DCCP patches for 2.6.25

2007-12-12 Thread Arnaldo Carvalho de Melo
Hi David,

Please consider pulling from:

master.kernel.org:/pub/scm/linux/kernel/git/acme/net-2.6.25

Best Regards,

- Arnaldo

 b/net/dccp/ccids/ccid3.c  |2 
 b/net/dccp/ccids/ccid3.h  |5 
 b/net/dccp/ccids/lib/loss_interval.c  |  161 ++-
 b/net/dccp/ccids/lib/loss_interval.h  |   56 ++
 b/net/dccp/ccids/lib/packet_history.c |   68 +++-
 b/net/dccp/ccids/lib/packet_history.h |   36 
 b/net/dccp/ccids/lib/tfrc.c   |   32 ++-
 b/net/dccp/ccids/lib/tfrc.h   |4 
 b/net/dccp/dccp.h |8 
 net/dccp/ccids/ccid3.c|   72 +++-
 net/dccp/ccids/ccid3.h|   10 -
 net/dccp/ccids/lib/loss_interval.c|  284 +-
 net/dccp/ccids/lib/loss_interval.h|   11 -
 net/dccp/ccids/lib/packet_history.c   |  279 +
 net/dccp/ccids/lib/packet_history.h   |   47 -
 net/dccp/ccids/lib/tfrc.c |   10 -
 16 files changed, 643 insertions(+), 442 deletions(-)
-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 6/7] [CCID3]: Interface CCID3 code with newer Loss Intervals Database

2007-12-12 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

This hooks up the TFRC Loss Interval database with CCID 3 packet reception.
In addition, it makes the CCID-specific computation of the first loss
interval (which requires access to all the guts of CCID3) local to ccid3.c.

The patch also fixes an omission in the DCCP code, that of a default /
fallback RTT value (defined in section 3.4 of RFC 4340 as 0.2 sec); while
at it, the  upper bound of 4 seconds for an RTT sample has  been reduced to
match the initial TCP RTO value of 3 seconds from[RFC 1122, 4.2.3.1].

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Ian McDonald [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/ccids/ccid3.c |   72 ++--
 net/dccp/ccids/ccid3.h |   10 ++--
 net/dccp/ccids/lib/loss_interval.c |   18 
 net/dccp/ccids/lib/tfrc.c  |   10 ++--
 net/dccp/dccp.h|7 ++-
 5 files changed, 84 insertions(+), 33 deletions(-)

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index b92069b..a818a1e 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -1,6 +1,7 @@
 /*
  *  net/dccp/ccids/ccid3.c
  *
+ *  Copyright (c) 2007   The University of Aberdeen, Scotland, UK
  *  Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
  *  Copyright (c) 2005-7 Ian McDonald [EMAIL PROTECTED]
  *
@@ -33,11 +34,7 @@
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
-#include ../ccid.h
 #include ../dccp.h
-#include lib/packet_history.h
-#include lib/loss_interval.h
-#include lib/tfrc.h
 #include ccid3.h
 
 #include asm/unaligned.h
@@ -757,6 +754,46 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, 
struct sk_buff *skb)
return 0;
 }
 
+/** ccid3_first_li  -  Implements [RFC 3448, 6.3.1]
+ *
+ * Determine the length of the first loss interval via inverse lookup.
+ * Assume that X_recv can be computed by the throughput equation
+ * s
+ * X_recv = 
+ *  R * fval
+ * Find some p such that f(p) = fval; return 1/p (scaled).
+ */
+static u32 ccid3_first_li(struct sock *sk)
+{
+   struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
+   u32 x_recv, p, delta;
+   u64 fval;
+
+   if (hcrx-ccid3hcrx_rtt == 0) {
+   DCCP_WARN(No RTT estimate available, using fallback RTT\n);
+   hcrx-ccid3hcrx_rtt = DCCP_FALLBACK_RTT;
+   }
+
+   delta = 
ktime_to_us(net_timedelta(hcrx-ccid3hcrx_tstamp_last_feedback));
+   x_recv = scaled_div32(hcrx-ccid3hcrx_bytes_recv, delta);
+   if (x_recv == 0) {  /* would also trigger divide-by-zero */
+   DCCP_WARN(X_recv==0\n);
+   if ((x_recv = hcrx-ccid3hcrx_x_recv) == 0) {
+   DCCP_BUG(stored value of X_recv is zero);
+   return ~0U;
+   }
+   }
+
+   fval = scaled_div(hcrx-ccid3hcrx_s, hcrx-ccid3hcrx_rtt);
+   fval = scaled_div32(fval, x_recv);
+   p = tfrc_calc_x_reverse_lookup(fval);
+
+   ccid3_pr_debug(%s(%p), receive rate=%u bytes/s, implied 
+  loss rate=%u\n, dccp_role(sk), sk, x_recv, p);
+
+   return p == 0 ? ~0U : scaled_div(1, p);
+}
+
 static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 {
struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
@@ -794,6 +831,14 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, 
struct sk_buff *skb)
/*
 * Handle pending losses and otherwise check for new loss
 */
+   if (tfrc_rx_hist_loss_pending(hcrx-ccid3hcrx_hist) 
+   tfrc_rx_handle_loss(hcrx-ccid3hcrx_hist,
+   hcrx-ccid3hcrx_li_hist,
+   skb, ndp, ccid3_first_li, sk) ) {
+   do_feedback = CCID3_FBACK_PARAM_CHANGE;
+   goto done_receiving;
+   }
+
if (tfrc_rx_hist_new_loss_indicated(hcrx-ccid3hcrx_hist, skb, ndp))
goto update_records;
 
@@ -803,7 +848,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct 
sk_buff *skb)
if (unlikely(!is_data_packet))
goto update_records;
 
-   if (list_empty(hcrx-ccid3hcrx_li_hist)) {  /* no loss so far: p = 0 */
+   if (!tfrc_lh_is_initialised(hcrx-ccid3hcrx_li_hist)) {
const u32 sample = 
tfrc_rx_hist_sample_rtt(hcrx-ccid3hcrx_hist, skb);
/*
 * Empty loss history: no loss so far, hence p stays 0.
@@ -812,6 +857,13 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, 
struct sk_buff *skb)
 */
if (sample != 0)
hcrx-ccid3hcrx_rtt = tfrc_ewma(hcrx-ccid3hcrx_rtt, 
sample, 9);
+
+   } else if (tfrc_lh_update_i_mean(hcrx-ccid3hcrx_li_hist, skb)) {
+   /*
+* Step (3

[PATCH 3/7] [TFRC]: Ringbuffer to track loss interval history

2007-12-12 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

A ringbuffer-based implementation of loss interval history is easier to
maintain, allocate, and update.

The `swap' routine to keep the RX history sorted is due to and was written
by Arnaldo Carvalho de Melo, simplifying an earlier macro-based variant.

Details:
 * access to the Loss Interval Records via macro wrappers (with safety checks);
 * simplified, on-demand allocation of entries (no extra memory consumption on
   lossless links); cache allocation is local to the module / exported as 
service;
 * provision of RFC-compliant algorithm to re-compute average loss interval;
 * provision of comprehensive, new loss detection algorithm
- support for all cases of loss, including re-ordered/duplicate packets;
- waiting for NDUPACK=3 packets to fill the hole;
- updating loss records when a late-arriving packet fills a hole.

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Ian McDonald [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/ccids/lib/loss_interval.c  |  161 +-
 net/dccp/ccids/lib/loss_interval.h  |   56 +-
 net/dccp/ccids/lib/packet_history.c |  218 ++-
 net/dccp/ccids/lib/packet_history.h |   11 +-
 net/dccp/ccids/lib/tfrc.h   |3 +
 5 files changed, 435 insertions(+), 14 deletions(-)

diff --git a/net/dccp/ccids/lib/loss_interval.c 
b/net/dccp/ccids/lib/loss_interval.c
index c0a933a..39980d1 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -1,6 +1,7 @@
 /*
  *  net/dccp/ccids/lib/loss_interval.c
  *
+ *  Copyright (c) 2007   The University of Aberdeen, Scotland, UK
  *  Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
  *  Copyright (c) 2005-7 Ian McDonald [EMAIL PROTECTED]
  *  Copyright (c) 2005 Arnaldo Carvalho de Melo [EMAIL PROTECTED]
@@ -10,12 +11,7 @@
  *  the Free Software Foundation; either version 2 of the License, or
  *  (at your option) any later version.
  */
-
-#include linux/module.h
 #include net/sock.h
-#include ../../dccp.h
-#include loss_interval.h
-#include packet_history.h
 #include tfrc.h
 
 #define DCCP_LI_HIST_IVAL_F_LENGTH  8
@@ -27,6 +23,54 @@ struct dccp_li_hist_entry {
u32  dccplih_interval;
 };
 
+static struct kmem_cache  *tfrc_lh_slab  __read_mostly;
+/* Loss Interval weights from [RFC 3448, 5.4], scaled by 10 */
+static const int tfrc_lh_weights[NINTERVAL] = { 10, 10, 10, 10, 8, 6, 4, 2 };
+
+/* implements LIFO semantics on the array */
+static inline u8 LIH_INDEX(const u8 ctr)
+{
+   return (LIH_SIZE - 1 - (ctr % LIH_SIZE));
+}
+
+/* the `counter' index always points at the next entry to be populated */
+static inline struct tfrc_loss_interval *tfrc_lh_peek(struct tfrc_loss_hist 
*lh)
+{
+   return lh-counter ? lh-ring[LIH_INDEX(lh-counter - 1)] : NULL;
+}
+
+/* given i with 0 = i = k, return I_i as per the rfc3448bis notation */
+static inline u32 tfrc_lh_get_interval(struct tfrc_loss_hist *lh, const u8 i)
+{
+   BUG_ON(i = lh-counter);
+   return lh-ring[LIH_INDEX(lh-counter - i - 1)]-li_length;
+}
+
+/*
+ * On-demand allocation and de-allocation of entries
+ */
+static struct tfrc_loss_interval *tfrc_lh_demand_next(struct tfrc_loss_hist 
*lh)
+{
+   if (lh-ring[LIH_INDEX(lh-counter)] == NULL)
+   lh-ring[LIH_INDEX(lh-counter)] = 
kmem_cache_alloc(tfrc_lh_slab,
+   GFP_ATOMIC);
+   return lh-ring[LIH_INDEX(lh-counter)];
+}
+
+void tfrc_lh_cleanup(struct tfrc_loss_hist *lh)
+{
+   if (!tfrc_lh_is_initialised(lh))
+   return;
+
+   for (lh-counter = 0; lh-counter  LIH_SIZE; lh-counter++)
+   if (lh-ring[LIH_INDEX(lh-counter)] != NULL) {
+   kmem_cache_free(tfrc_lh_slab,
+   lh-ring[LIH_INDEX(lh-counter)]);
+   lh-ring[LIH_INDEX(lh-counter)] = NULL;
+   }
+}
+EXPORT_SYMBOL_GPL(tfrc_lh_cleanup);
+
 static struct kmem_cache *dccp_li_cachep __read_mostly;
 
 static inline struct dccp_li_hist_entry *dccp_li_hist_entry_new(const gfp_t 
prio)
@@ -98,6 +142,65 @@ u32 dccp_li_hist_calc_i_mean(struct list_head *list)
 
 EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean);
 
+static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh)
+{
+   u32 i_i, i_tot0 = 0, i_tot1 = 0, w_tot = 0;
+   int i, k = tfrc_lh_length(lh) - 1; /* k is as in rfc3448bis, 5.4 */
+
+   for (i=0; i = k; i++) {
+   i_i = tfrc_lh_get_interval(lh, i);
+
+   if (i  k) {
+   i_tot0 += i_i * tfrc_lh_weights[i];
+   w_tot  += tfrc_lh_weights[i];
+   }
+   if (i  0)
+   i_tot1 += i_i * tfrc_lh_weights[i-1];
+   }
+
+   BUG_ON(w_tot == 0);
+   lh-i_mean = max(i_tot0, i_tot1) / w_tot

Re: [PATCH 8/8] [PATCH v2] [CCID3]: Interface CCID3 code with newer Loss Intervals Database

2007-12-12 Thread Arnaldo Carvalho de Melo
Em Wed, Dec 12, 2007 at 04:56:32PM +, Gerrit Renker escreveu:
 | This time around I'm not doing any reordering, just trying to use your
 | patches as is, but adding this patch as-is produces a kernel that will
 | crash, no?
 | 
 |  The loss history and the RX/TX packet history slabs are all created in
 |  tfrc.c using the three different __init routines of the dccp_tfrc_lib.
 | 
 | Yes, the init routines are called and in turn they create the slab
 | caches, but up to the patch [PATCH 8/8] [PATCH v2] [CCID3]: Interface
 | CCID3 code with newer Loss Intervals Database the new li slab is not
 | being created, no? See what I'm talking?
 | 
 Sorry, there is some weird kind of mix-up going on. Can you please check
 your patch set: it seems this email exchange refers to an older variant.
 In the most recent patch set, the slab is introduced in the patch
 
   [TFRC]: Ringbuffer to track loss interval history
 
 --- a/net/dccp/ccids/lib/loss_interval.c
 +++ b/net/dccp/ccids/lib/loss_interval.c
 @@ -27,6 +23,54 @@ struct dccp_li_hist_entry {
 u32  dccplih_interval;
  };
 
 +static struct kmem_cache  *tfrc_lh_slab  __read_mostly;  /* === */

Yup, this one, is introduced as above but is not initialized at the
module init routine, please see, it should be OK and we can move on:

http://git.kernel.org/?p=linux/kernel/git/acme/net-2.6.25.git;a=commitdiff;h=a925429ce2189b548dc19037d3ebd4ff35ae4af7

 +/* Loss Interval weights from [RFC 3448, 5.4], scaled by 10 */
 +static const int tfrc_lh_weights[NINTERVAL] = { 10, 10, 10, 10, 8, 6, 4, 2 };
 // ...
 
 And this is 6/8, i.e. before 8/8, cf.
   http://www.mail-archive.com/dccp@vger.kernel.org/msg03000.html
  
 I don't know which tree you are working off, would it be possible to
 check against the test tree
   git://eden-feed.erg.abdn.ac.uk/dccp_exp [dccp]

I'm doing a fresh clone now. But I think that everything is OK after
today's merge request I sent to David.

- Arnaldo
-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 8/8] [PATCH v2] [CCID3]: Interface CCID3 code with newer Loss Intervals Database

2007-12-11 Thread Arnaldo Carvalho de Melo
Em Tue, Dec 11, 2007 at 09:42:38AM +, Gerrit Renker escreveu:
 | When interfacing we must make sure that ccid3 tfrc_lh_slab is created
 | and then tfrc_li_cachep is not needed. I'm doing this while keeping
 | the structure of the patches, i.e. one introducing, the other removing.
 | But we need to create tfrc_lh_slab if we want the tree to be bisectable.
 | 
 | I'm doing this and keeping your Signed-off-line, please holler if you
 | disagree for some reason.
 If you are just shifting and reordering then that is fine with me. But
 it seems you mean a different patch since in this one there is no slab
 initialisation. 

This time around I'm not doing any reordering, just trying to use your
patches as is, but adding this patch as-is produces a kernel that will
crash, no?

 The loss history and the RX/TX packet history slabs are all created in
 tfrc.c using the three different __init routines of the dccp_tfrc_lib.

Yes, the init routines are called and in turn they create the slab
caches, but up to the patch [PATCH 8/8] [PATCH v2] [CCID3]: Interface
CCID3 code with newer Loss Intervals Database the new li slab is not
being created, no? See what I'm talking?

- Arnaldo
-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Announce]: Test results with latest CCID3 patch set

2007-12-11 Thread Arnaldo Carvalho de Melo
Em Tue, Dec 11, 2007 at 02:21:28PM +, Gerrit Renker escreveu:
 | I am new of this mailing list and I am really interested in the
 | measurements you are performing with DCCP.
 This was more of a regression test, as there had been recent changes in
 the test tree, to see that the kernel (not userspace) still performs in
 a predictable way.
 
 | Which tool are you using ? Are you using Iperf for such measurements ?
 The setup is the one from
   http://www.linux-foundation.org/en/Net:DCCP_Testing#Regression_testing
 and, yes, it uses iperf.  
 
 | Have you ever heard about D-ITG ?
 
 | You can find more information here:
 | http://www.grid.unina.it/software/ITG
 | 
 | I am one of the authors of such platform and I have also
 | performed some very preliminary tests with DCCP.
 | 
 | I would be very glad to have your opinion on that and I'm very interested
 | in improving its features, also with specific regard to the support of
 | transport protocols.
 | 
 It is a very nice tool with many features. I only ran simple tests with it 
 (version 2.6),
 again only as basic sanity tests -- the throughput result was similar to the 
 one tested with
 iperf.
 
 I think that the tool has more to offer and can help improve/extend DCCP 
 testing.
 Here is my list of points, hoping that the others will add theirs, too:
 
  * would be good to have a standardised set of scripts, for 
 comparison/benchmarking
 
  * the built-in VoIP module only works for UDP -- is it possible to port this 
 to DCCP?
  
  * as per previous email, more complex traffic scenarios would be good, in 
 particular
- switching on/off background traffic at times to observe 
 TCP/flow-friendliness
- running multiple DCCP flows in parallel and at overlapping times 

Does this tool records results in a database keyed by kernel
version/buildid for us to use it as a regression tool?

Something that would produce results around these lines:

WARNING: test #23 counter #3 variance bigger than specified since the
last kernel tested (git cset 55ed793afb4a8025d33a8e6a5f2f89d5ac4d8432)!

- Arnaldo
-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/8] [TFRC]: Loss interval code needs the macros/inlines that were moved

2007-12-10 Thread Arnaldo Carvalho de Melo
Em Mon, Dec 10, 2007 at 11:31:53AM +, Gerrit Renker escreveu:
 |  
 |distcc[24516] ERROR: compile 
 /root/.ccache/packet_his.tmp.aspire.home.net.24512.i on _tiptop failed
 |/usr/src/davem-2.6/net/dccp/ccids/lib/packet_history.c: In function 
 '__one_after_loss':
 |/usr/src/davem-2.6/net/dccp/ccids/lib/packet_history.c:266: error: 
 lvalue required as unary '' operand
 snip
 | 
 | Because you do it this way:
 | 
 | tfrc_rx_hist_swap(TFRC_RX_HIST_ENTRY(h, 0), TFRC_RX_HIST_ENTRY(h, 3));
 | 
 | I checked and at least in this patch series all uses are of this type,
 | so why not do it using just the indexes, which would be simpler:
 | 
 | tfrc_rx_hist_swap(h, 0, 3);
 | 
 | With this implementation:
 | 
 | static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const int a, const 
 int b)
 | {
 | const int idx_a = tfrc_rx_hist_index(h, a),
 |   int idx_b = tfrc_rx_hist_index(h, b);
 | struct tfrc_rx_hist_entry *tmp = h-ring[idx_a];
 | 
 | h-ring[idx_a] = h-ring[idx_b];
 | h-ring[idx_b] = tmp;
 | }
 | 
 Agreed, that is useful in the present case, since then everything uses
 inlines. The only suggestion I'd like to make is to use `u8' instead of 
 `int' since the indices will have very low values.

Agreed.
 
 There is a related point: you will probably have noticed that loss_interval.c 
 also uses macros. I don't know if you are planning to convert these also into 
 inlines. I think that there would be less benefit in converting these, since
 they are locl to loss_interval.c and mostly serve to improve readability.

In general I'm against using macros for functions, so please always
consider doing things as inlines.

I'll read some more patches today and provide comments as to if I think
it is ok for now to keep it as macros.

 As I have at least one other patch to revise (plus another minor one),
 I'll rework this according to the above. 

Thank you.

- Arnaldo
-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 8/8] [PATCH v2] [CCID3]: Interface CCID3 code with newer Loss Intervals Database

2007-12-10 Thread Arnaldo Carvalho de Melo
Em Sat, Dec 08, 2007 at 10:06:28AM +, Gerrit Renker escreveu:
 This hooks up the TFRC Loss Interval database with CCID 3 packet reception.
 In addition, it makes the CCID-specific computation of the first loss
 interval (which requires access to all the guts of CCID3) local to ccid3.c.
 
 The patch also fixes an omission in the DCCP code, that of a default /
 fallback RTT value (defined in section 3.4 of RFC 4340 as 0.2 sec); while
 at it, the  upper bound of 4 seconds for an RTT sample has  been reduced to
 match the initial TCP RTO value of 3 seconds from[RFC 1122, 4.2.3.1].
 
 Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
 Signed-off-by: Ian McDonald [EMAIL PROTECTED]

When interfacing we must make sure that ccid3 tfrc_lh_slab is created
and then tfrc_li_cachep is not needed. I'm doing this while keeping
the structure of the patches, i.e. one introducing, the other removing.
But we need to create tfrc_lh_slab if we want the tree to be bisectable.

I'm doing this and keeping your Signed-off-line, please holler if you
disagree for some reason.

- Arnaldo
-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/8] [TFRC]: Whitespace cleanups

2007-12-08 Thread Arnaldo Carvalho de Melo
Em Sat, Dec 08, 2007 at 10:06:21AM +, Gerrit Renker escreveu:
 Just some tidy-ups to keep git/quilt happy. Also moved up the
 comment Receiver routines above the first occurrence of RX
 history routines.
 
 Signed-off-by: Gerrit Renker [EMAIL PROTECTED]

Thanks, applied.
-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/8] [TFRC]: Put RX/TX initialisation into tfrc.c

2007-12-08 Thread Arnaldo Carvalho de Melo
Em Sat, Dec 08, 2007 at 10:06:22AM +, Gerrit Renker escreveu:
 This separates RX/TX initialisation and puts all packet history / loss 
 intervals
 initialisation into tfrc.c.
 The organisation is uniform: slab declaration - {rx,tx}_init() - 
 {rx,tx}_exit()

NAK, you can't call a __exit marked routine from a __init marked
routine.

- Arnaldo
 
 Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
 ---
  net/dccp/ccids/lib/packet_history.c |   68 --
  net/dccp/ccids/lib/tfrc.c   |   31 
  2 files changed, 55 insertions(+), 44 deletions(-)
 
 diff --git a/net/dccp/ccids/lib/packet_history.c 
 b/net/dccp/ccids/lib/packet_history.c
 index 54cd23e..af0db71 100644
 --- a/net/dccp/ccids/lib/packet_history.c
 +++ b/net/dccp/ccids/lib/packet_history.c
 @@ -57,6 +57,22 @@ struct tfrc_tx_hist_entry {
   */

SNIP

 +
 +void __exit tfrc_tx_packet_history_exit(void)
 +{
 + if (tfrc_tx_hist_slab != NULL) {
 + kmem_cache_destroy(tfrc_tx_hist_slab);
 + tfrc_tx_hist_slab = NULL;
 + }
 +}
 +

SNIP

 diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c
 index 3a7a183..20763fa 100644
 --- a/net/dccp/ccids/lib/tfrc.c
 +++ b/net/dccp/ccids/lib/tfrc.c
 @@ -14,27 +14,42 @@ module_param(tfrc_debug, bool, 0444);
  MODULE_PARM_DESC(tfrc_debug, Enable debug messages);
  #endif
  
 +extern int  tfrc_tx_packet_history_init(void);
 +extern void tfrc_tx_packet_history_exit(void);
 +extern int  tfrc_rx_packet_history_init(void);
 +extern void tfrc_rx_packet_history_exit(void);
 +
  extern int  dccp_li_init(void);
  extern void dccp_li_exit(void);
 -extern int packet_history_init(void);
 -extern void packet_history_exit(void);
  
  static int __init tfrc_module_init(void)
  {
   int rc = dccp_li_init();
  
 - if (rc == 0) {
 - rc = packet_history_init();
 - if (rc != 0)
 - dccp_li_exit();
 - }
 + if (rc)
 + goto out;
 +
 + rc = tfrc_tx_packet_history_init();
 + if (rc)
 + goto out_free_loss_intervals;
  
 + rc = tfrc_rx_packet_history_init();
 + if (rc)
 + goto out_free_tx_history;
 + return 0;
 +
 +out_free_tx_history:
 + tfrc_tx_packet_history_exit();
 +out_free_loss_intervals:
 + dccp_li_exit();
 +out:
   return rc;
  }

SNIP
-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/8] [TFRC/CCID3]: Remove now unused functions / function calls

2007-12-08 Thread Arnaldo Carvalho de Melo
Em Sat, Dec 08, 2007 at 10:06:23AM +, Gerrit Renker escreveu:
 This removes two things which now have become redundant:
  1. The function tfrc_rx_hist_entry_delete() is no longer referenced anywhere.
  2. The CCID3 HC-receiver still inserted timestamps, but received timestamps
 are not parsed/referenced/used by the HC-sender, it serves no function.
 
 Signed-off-by: Gerrit Renker [EMAIL PROTECTED]

Thanks, applying as two separate patches.

- Arnaldo
-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/8] [TFRC]: Loss interval code needs the macros/inlines that were moved

2007-12-08 Thread Arnaldo Carvalho de Melo
Em Sat, Dec 08, 2007 at 10:06:25AM +, Gerrit Renker escreveu:
 This moves the inlines (which were previously declared as macros) back into 
 packet_history.h since
 the loss detection code needs to be able to read entries from the RX history 
 in order to create the
 relevant loss entries: it needs at least tfrc_rx_hist_loss_prev() and 
 tfrc_rx_hist_last_rcv(), which
 in turn require the definition of the other inlines (macros).
 
 Additionally, inn one case the use of inlines instead of a macro broke the 
 algorithm: rx_hist_swap()
 (introduced in next patch) needs to be able to swap the history entries; when 
 using an inline returning
 a pointer instead, one gets compilation errors such as:
 
   distcc[24516] ERROR: compile 
 /root/.ccache/packet_his.tmp.aspire.home.net.24512.i on _tiptop failed
   /usr/src/davem-2.6/net/dccp/ccids/lib/packet_history.c: In function 
 '__one_after_loss':
   /usr/src/davem-2.6/net/dccp/ccids/lib/packet_history.c:266: error: lvalue 
 required as unary '' operand
   /usr/src/davem-2.6/net/dccp/ccids/lib/packet_history.c:267: error: lvalue 
 required as unary '' operand
   /usr/src/davem-2.6/net/dccp/ccids/lib/packet_history.c: In function 
 '__two_after_loss':
   /usr/src/davem-2.6/net/dccp/ccids/lib/packet_history.c:298: error: lvalue 
 required as unary '' operand
   /usr/src/davem-2.6/net/dccp/ccids/lib/packet_history.c:299: error: lvalue 
 required as unary '' operand
   /usr/src/davem-2.6/net/dccp/ccids/lib/packet_history.c:336: error: lvalue 
 required as unary '' operand
   /usr/src/davem-2.6/net/dccp/ccids/lib/packet_history.c:337: error: lvalue 
 required as unary '' operand
   make[4]: *** [net/dccp/ccids/lib/packet_history.o] Error 1
   make[3]: *** [net/dccp/ccids/lib] Error 2
   make[2]: *** [net/dccp/ccids] Error 2
   make[1]: *** [net/dccp/] Error 2
   make: *** [sub-make] Error 2

Because you do it this way:

tfrc_rx_hist_swap(TFRC_RX_HIST_ENTRY(h, 0), TFRC_RX_HIST_ENTRY(h, 3));

I checked and at least in this patch series all uses are of this type,
so why not do it using just the indexes, which would be simpler:

tfrc_rx_hist_swap(h, 0, 3);

With this implementation:

static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const int a, const int b)
{
const int idx_a = tfrc_rx_hist_index(h, a),
  int idx_b = tfrc_rx_hist_index(h, b);
struct tfrc_rx_hist_entry *tmp = h-ring[idx_a];

h-ring[idx_a] = h-ring[idx_b];
h-ring[idx_b] = tmp;
}

- Arnaldo
-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/3] [TFRC]: The function tfrc_rx_hist_entry_delete() is not used anymore

2007-12-08 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/ccids/lib/packet_history.c |5 -
 1 files changed, 0 insertions(+), 5 deletions(-)

diff --git a/net/dccp/ccids/lib/packet_history.c 
b/net/dccp/ccids/lib/packet_history.c
index 54cd23e..af44082 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -151,11 +151,6 @@ void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
 }
 EXPORT_SYMBOL_GPL(tfrc_rx_hist_add_packet);
 
-static inline void tfrc_rx_hist_entry_delete(struct tfrc_rx_hist_entry *entry)
-{
-   kmem_cache_free(tfrc_rx_hist_slab, entry);
-}
-
 /**
  * tfrc_rx_hist_entry - return the n-th history entry after loss_start
  */
-- 
1.5.3.4

-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHES 0/3]: DCCP patches for 2.6.25

2007-12-08 Thread Arnaldo Carvalho de Melo
Hi David,

Please consider pulling from:

master.kernel.org:/pub/scm/linux/kernel/git/acme/net-2.6.25

Best Regards,

- Arnaldo

 b/net/dccp/ccids/ccid3.c  |   16 
 b/net/dccp/ccids/lib/loss_interval.c  |2 +-
 b/net/dccp/ccids/lib/packet_history.c |   13 ++---
 net/dccp/ccids/ccid3.c|4 +---
 net/dccp/ccids/lib/packet_history.c   |6 --
 5 files changed, 16 insertions(+), 25 deletions(-)
-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC2][PATCH 7/7] [TFRC]: New rx history code

2007-12-06 Thread Arnaldo Carvalho de Melo
Em Thu, Dec 06, 2007 at 02:02:25PM +, Gerrit Renker escreveu:
 | The first six patches in this series are unmodified, so if you
 | are OK with them please send me your Signed-off-by.
 Patches [1/7], [2/7], and [6/7] already have a signed-off and there are
 no changes. Just acknowledged [3..5/7], will look at [7/7] now.

OK, please let me know if there are still any problems.

The removal of timestamp insertion in ccid3_hc_rx_insert_options will be
put in another cset.

- Arnaldo
-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 6/7] [CCID3]: The receiver of a half-connection does not set window counter values

2007-12-06 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

Only the sender sets window counters [RFC 4342, sections 5 and 8.1].

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Ian McDonald [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/ccids/ccid3.c |1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index c95dca8..5ff5aab 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -733,7 +733,6 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, 
struct sk_buff *skb)
return 0;
 
hcrx = ccid3_hc_rx_sk(sk);
-   DCCP_SKB_CB(skb)-dccpd_ccval = hcrx-ccid3hcrx_ccval_last_counter;
 
if (dccp_packet_without_ack(skb))
return 0;
-- 
1.5.3.4

-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/7] [TFRC]: Make the rx history slab be global

2007-12-06 Thread Arnaldo Carvalho de Melo
This is in preparation for merging the new rx history code written by Gerrit 
Renker.

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/ccids/ccid3.c  |   35 ++---
 net/dccp/ccids/lib/packet_history.c |   95 ++-
 net/dccp/ccids/lib/packet_history.h |   43 ++--
 3 files changed, 60 insertions(+), 113 deletions(-)

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 5dea690..07920bb 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -49,8 +49,6 @@ static int ccid3_debug;
 #define ccid3_pr_debug(format, a...)
 #endif
 
-static struct dccp_rx_hist *ccid3_rx_hist;
-
 /*
  * Transmitter Half-Connection Routines
  */
@@ -807,9 +805,9 @@ static int ccid3_hc_rx_detect_loss(struct sock *sk,
}
 
 detect_out:
-   dccp_rx_hist_add_packet(ccid3_rx_hist, hcrx-ccid3hcrx_hist,
-  hcrx-ccid3hcrx_li_hist, packet,
-  hcrx-ccid3hcrx_seqno_nonloss);
+   dccp_rx_hist_add_packet(hcrx-ccid3hcrx_hist,
+   hcrx-ccid3hcrx_li_hist, packet,
+   hcrx-ccid3hcrx_seqno_nonloss);
return loss;
 }
 
@@ -852,8 +850,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct 
sk_buff *skb)
return;
}
 
-   packet = dccp_rx_hist_entry_new(ccid3_rx_hist, opt_recv-dccpor_ndp,
-   skb, GFP_ATOMIC);
+   packet = dccp_rx_hist_entry_new(opt_recv-dccpor_ndp, skb, GFP_ATOMIC);
if (unlikely(packet == NULL)) {
DCCP_WARN(%s(%p), Not enough mem to add rx packet 
  to history, consider it lost!\n, dccp_role(sk), sk);
@@ -936,7 +933,7 @@ static void ccid3_hc_rx_exit(struct sock *sk)
ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM);
 
/* Empty packet history */
-   dccp_rx_hist_purge(ccid3_rx_hist, hcrx-ccid3hcrx_hist);
+   dccp_rx_hist_purge(hcrx-ccid3hcrx_hist);
 
/* Empty loss interval history */
dccp_li_hist_purge(hcrx-ccid3hcrx_li_hist);
@@ -1013,33 +1010,13 @@ MODULE_PARM_DESC(ccid3_debug, Enable debug messages);
 
 static __init int ccid3_module_init(void)
 {
-   int rc = -ENOBUFS;
-
-   ccid3_rx_hist = dccp_rx_hist_new(ccid3);
-   if (ccid3_rx_hist == NULL)
-   goto out;
-
-   rc = ccid_register(ccid3);
-   if (rc != 0)
-   goto out_free_rx;
-out:
-   return rc;
-
-out_free_rx:
-   dccp_rx_hist_delete(ccid3_rx_hist);
-   ccid3_rx_hist = NULL;
-   goto out;
+   return ccid_register(ccid3);
 }
 module_init(ccid3_module_init);
 
 static __exit void ccid3_module_exit(void)
 {
ccid_unregister(ccid3);
-
-   if (ccid3_rx_hist != NULL) {
-   dccp_rx_hist_delete(ccid3_rx_hist);
-   ccid3_rx_hist = NULL;
-   }
 }
 module_exit(ccid3_module_exit);
 
diff --git a/net/dccp/ccids/lib/packet_history.c 
b/net/dccp/ccids/lib/packet_history.c
index b628714..e1ab853 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -114,48 +114,33 @@ EXPORT_SYMBOL_GPL(tfrc_tx_hist_rtt);
 /*
  * Receiver History Routines
  */
-struct dccp_rx_hist *dccp_rx_hist_new(const char *name)
+static struct kmem_cache *tfrc_rx_hist_slab;
+
+struct dccp_rx_hist_entry *dccp_rx_hist_entry_new(const u32 ndp,
+ const struct sk_buff *skb,
+ const gfp_t prio)
 {
-   struct dccp_rx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC);
-   static const char dccp_rx_hist_mask[] = rx_hist_%s;
-   char *slab_name;
-
-   if (hist == NULL)
-   goto out;
-
-   slab_name = kmalloc(strlen(name) + sizeof(dccp_rx_hist_mask) - 1,
-   GFP_ATOMIC);
-   if (slab_name == NULL)
-   goto out_free_hist;
-
-   sprintf(slab_name, dccp_rx_hist_mask, name);
-   hist-dccprxh_slab = kmem_cache_create(slab_name,
-sizeof(struct dccp_rx_hist_entry),
-0, SLAB_HWCACHE_ALIGN, NULL);
-   if (hist-dccprxh_slab == NULL)
-   goto out_free_slab_name;
-out:
-   return hist;
-out_free_slab_name:
-   kfree(slab_name);
-out_free_hist:
-   kfree(hist);
-   hist = NULL;
-   goto out;
-}
+   struct dccp_rx_hist_entry *entry = kmem_cache_alloc(tfrc_rx_hist_slab,
+   prio);
 
-EXPORT_SYMBOL_GPL(dccp_rx_hist_new);
+   if (entry != NULL) {
+   const struct dccp_hdr *dh = dccp_hdr(skb);
 
-void dccp_rx_hist_delete(struct dccp_rx_hist *hist)
-{
-   const char* name = kmem_cache_name(hist-dccprxh_slab);
+   entry-dccphrx_seqno = DCCP_SKB_CB(skb)-dccpd_seq;
+   entry-dccphrx_ccval = dh

[PATCH 7/7] [TFRC]: New rx history code

2007-12-06 Thread Arnaldo Carvalho de Melo
Credit here goes to Gerrit Renker, that provided the initial implementation for
this new codebase.

I modified it just to try to make it closer to the existing API, renaming some
functions, add namespacing and fix one bug where the tfrc_rx_hist_alloc was not
freeing the allocated ring entries on the error path.

Original changeset comment from Gerrit:
  ---
This provides a new, self-contained and generic RX history service for TFRC
based protocols.

Details:
 * new data structure, initialisation and cleanup routines;
 * allocation of dccp_rx_hist entries local to packet_history.c,
   as a service exported by the dccp_tfrc_lib module.
 * interface to automatically track highest-received seqno;
 * receiver-based RTT estimation (needed for instance by RFC 3448, 6.3.1);
 * a generic function to test for `data packets' as per  RFC 4340, sec. 7.7.

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/ccids/ccid3.c  |  288 --
 net/dccp/ccids/ccid3.h  |   14 +-
 net/dccp/ccids/lib/loss_interval.c  |   13 ++-
 net/dccp/ccids/lib/packet_history.c |  290 +--
 net/dccp/ccids/lib/packet_history.h |   83 +--
 5 files changed, 330 insertions(+), 358 deletions(-)

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 5ff5aab..faacffa 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -641,6 +641,15 @@ static int ccid3_hc_tx_getsockopt(struct sock *sk, const 
int optname, int len,
 /*
  * Receiver Half-Connection Routines
  */
+
+/* CCID3 feedback types */
+enum ccid3_fback_type {
+   CCID3_FBACK_NONE = 0,
+   CCID3_FBACK_INITIAL,
+   CCID3_FBACK_PERIODIC,
+   CCID3_FBACK_PARAM_CHANGE
+};
+
 #ifdef CONFIG_IP_DCCP_CCID3_DEBUG
 static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
 {
@@ -667,59 +676,60 @@ static void ccid3_hc_rx_set_state(struct sock *sk,
hcrx-ccid3hcrx_state = state;
 }
 
-static inline void ccid3_hc_rx_update_s(struct ccid3_hc_rx_sock *hcrx, int len)
-{
-   if (likely(len  0))/* don't update on empty packets (e.g. ACKs) */
-   hcrx-ccid3hcrx_s = tfrc_ewma(hcrx-ccid3hcrx_s, len, 9);
-}
-
-static void ccid3_hc_rx_send_feedback(struct sock *sk)
+static void ccid3_hc_rx_send_feedback(struct sock *sk,
+ const struct sk_buff *skb,
+ enum ccid3_fback_type fbtype)
 {
struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
struct dccp_sock *dp = dccp_sk(sk);
-   struct tfrc_rx_hist_entry *packet;
ktime_t now;
-   suseconds_t delta;
+   s64 delta = 0;
 
ccid3_pr_debug(%s(%p) - entry \n, dccp_role(sk), sk);
 
+   if (unlikely(hcrx-ccid3hcrx_state == TFRC_RSTATE_TERM))
+   return;
+
now = ktime_get_real();
 
-   switch (hcrx-ccid3hcrx_state) {
-   case TFRC_RSTATE_NO_DATA:
+   switch (fbtype) {
+   case CCID3_FBACK_INITIAL:
hcrx-ccid3hcrx_x_recv = 0;
+   hcrx-ccid3hcrx_pinv   = ~0U;   /* see RFC 4342, 8.5 */
break;
-   case TFRC_RSTATE_DATA:
-   delta = ktime_us_delta(now,
-  hcrx-ccid3hcrx_tstamp_last_feedback);
-   DCCP_BUG_ON(delta  0);
-   hcrx-ccid3hcrx_x_recv =
-   scaled_div32(hcrx-ccid3hcrx_bytes_recv, delta);
+   case CCID3_FBACK_PARAM_CHANGE:
+   /*
+* When parameters change (new loss or p  p_prev), we do not
+* have a reliable estimate for R_m of [RFC 3448, 6.2] and so
+* need to  reuse the previous value of X_recv. However, when
+* X_recv was 0 (due to early loss), this would kill X down to
+* s/t_mbi (i.e. one packet in 64 seconds).
+* To avoid such drastic reduction, we approximate X_recv as
+* the number of bytes since last feedback.
+* This is a safe fallback, since X is bounded above by X_calc.
+*/
+   if (hcrx-ccid3hcrx_x_recv  0)
+   break;
+   /* fall through */
+   case CCID3_FBACK_PERIODIC:
+   delta = ktime_us_delta(now, 
hcrx-ccid3hcrx_tstamp_last_feedback);
+   if (delta = 0)
+   DCCP_BUG(delta (%ld) = 0, (long)delta);
+   else
+   hcrx-ccid3hcrx_x_recv =
+   scaled_div32(hcrx-ccid3hcrx_bytes_recv, delta);
break;
-   case TFRC_RSTATE_TERM:
-   DCCP_BUG(%s(%p) - Illegal state TERM, dccp_role(sk), sk);
+   default:
return;
}
 
-   packet = tfrc_rx_hist_find_data_packet(hcrx-ccid3hcrx_hist);
-   if (unlikely(packet == NULL)) {
-   DCCP_WARN(%s(%p), no data

[PATCHES 0/7]: DCCP patches for 2.6.25

2007-12-06 Thread Arnaldo Carvalho de Melo
Hi David,

Please consider pulling from:

master.kernel.org:/pub/scm/linux/kernel/git/acme/net-2.6.25

Best Regards,

- Arnaldo

 b/net/dccp/ccids/Kconfig  |   13 
 b/net/dccp/ccids/ccid3.c  |   35 --
 b/net/dccp/ccids/ccid3.h  |   14 
 b/net/dccp/ccids/lib/Makefile |2 
 b/net/dccp/ccids/lib/loss_interval.c  |   14 
 b/net/dccp/ccids/lib/packet_history.c |   27 -
 b/net/dccp/ccids/lib/packet_history.h |3 
 b/net/dccp/ccids/lib/tfrc.c   |   48 +++
 b/net/dccp/ccids/lib/tfrc.h   |   18 -
 b/net/dccp/dccp.h |   13 
 net/dccp/ccids/ccid3.c|  322 --
 net/dccp/ccids/lib/loss_interval.c|   13 
 net/dccp/ccids/lib/packet_history.c   |  496 +++---
 net/dccp/ccids/lib/packet_history.h   |  177 
 14 files changed, 579 insertions(+), 616 deletions(-)
-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/7] [TFRC]: Rename tfrc_tx_hist to tfrc_tx_hist_slab, for consistency

2007-12-06 Thread Arnaldo Carvalho de Melo
Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/ccids/lib/packet_history.c |   20 ++--
 1 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/net/dccp/ccids/lib/packet_history.c 
b/net/dccp/ccids/lib/packet_history.c
index 1d4d6ee..b628714 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -53,7 +53,7 @@ struct tfrc_tx_hist_entry {
 /*
  * Transmitter History Routines
  */
-static struct kmem_cache *tfrc_tx_hist;
+static struct kmem_cache *tfrc_tx_hist_slab;
 
 static struct tfrc_tx_hist_entry *
tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
@@ -66,7 +66,7 @@ static struct tfrc_tx_hist_entry *
 
 int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno)
 {
-   struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist, 
gfp_any());
+   struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, 
gfp_any());
 
if (entry == NULL)
return -ENOBUFS;
@@ -85,7 +85,7 @@ void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp)
while (head != NULL) {
struct tfrc_tx_hist_entry *next = head-next;
 
-   kmem_cache_free(tfrc_tx_hist, head);
+   kmem_cache_free(tfrc_tx_hist_slab, head);
head = next;
}
 
@@ -278,17 +278,17 @@ EXPORT_SYMBOL_GPL(dccp_rx_hist_purge);
 
 __init int packet_history_init(void)
 {
-   tfrc_tx_hist = kmem_cache_create(tfrc_tx_hist,
-sizeof(struct tfrc_tx_hist_entry), 0,
-SLAB_HWCACHE_ALIGN, NULL);
+   tfrc_tx_hist_slab = kmem_cache_create(tfrc_tx_hist,
+ sizeof(struct 
tfrc_tx_hist_entry), 0,
+ SLAB_HWCACHE_ALIGN, NULL);
 
-   return tfrc_tx_hist == NULL ? -ENOBUFS : 0;
+   return tfrc_tx_hist_slab == NULL ? -ENOBUFS : 0;
 }
 
 void packet_history_exit(void)
 {
-   if (tfrc_tx_hist != NULL) {
-   kmem_cache_destroy(tfrc_tx_hist);
-   tfrc_tx_hist = NULL;
+   if (tfrc_tx_hist_slab != NULL) {
+   kmem_cache_destroy(tfrc_tx_hist_slab);
+   tfrc_tx_hist_slab = NULL;
}
 }
-- 
1.5.3.4

-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/7] [TFRC]: Provide central source file and debug facility

2007-12-06 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

This patch changes the tfrc_lib module in the following manner:

 (1) a dedicated tfrc source file to call the packet history 
 loss interval init/exit functions.
 (2) a dedicated tfrc_pr_debug macro with toggle switch `tfrc_debug'.

Commiter note: renamed tfrc_module.c to tfrc.c, and made CONFIG_IP_DCCP_CCID3
select IP_DCCP_TFRC_LIB.

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Ian McDonald [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/ccids/Kconfig  |   13 ++---
 net/dccp/ccids/lib/Makefile |2 +-
 net/dccp/ccids/lib/packet_history.c |   27 ++-
 net/dccp/ccids/lib/packet_history.h |3 +-
 net/dccp/ccids/lib/tfrc.c   |   48 +++
 net/dccp/ccids/lib/tfrc.h   |   17 +---
 6 files changed, 75 insertions(+), 35 deletions(-)
 create mode 100644 net/dccp/ccids/lib/tfrc.c

diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index 3d7d867..1227594 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -38,6 +38,7 @@ config IP_DCCP_CCID2_DEBUG
 config IP_DCCP_CCID3
tristate CCID3 (TCP-Friendly) (EXPERIMENTAL)
def_tristate IP_DCCP
+   select IP_DCCP_TFRC_LIB
---help---
  CCID 3 denotes TCP-Friendly Rate Control (TFRC), an equation-based
  rate-controlled congestion control mechanism.  TFRC is designed to
@@ -63,10 +64,6 @@ config IP_DCCP_CCID3
 
  If in doubt, say M.
 
-config IP_DCCP_TFRC_LIB
-   depends on IP_DCCP_CCID3
-   def_tristate IP_DCCP_CCID3
-
 config IP_DCCP_CCID3_DEBUG
  bool CCID3 debugging messages
  depends on IP_DCCP_CCID3
@@ -110,5 +107,13 @@ config IP_DCCP_CCID3_RTO
is serious network congestion: experimenting with larger values 
should
therefore not be performed on WANs.
 
+config IP_DCCP_TFRC_LIB
+   tristate
+   default n
+
+config IP_DCCP_TFRC_DEBUG
+   bool
+   depends on IP_DCCP_TFRC_LIB
+   default y if IP_DCCP_CCID3_DEBUG
 
 endmenu
diff --git a/net/dccp/ccids/lib/Makefile b/net/dccp/ccids/lib/Makefile
index 5f940a6..68c93e3 100644
--- a/net/dccp/ccids/lib/Makefile
+++ b/net/dccp/ccids/lib/Makefile
@@ -1,3 +1,3 @@
 obj-$(CONFIG_IP_DCCP_TFRC_LIB) += dccp_tfrc_lib.o
 
-dccp_tfrc_lib-y := loss_interval.o packet_history.o tfrc_equation.o
+dccp_tfrc_lib-y := tfrc.o tfrc_equation.o packet_history.o loss_interval.o
diff --git a/net/dccp/ccids/lib/packet_history.c 
b/net/dccp/ccids/lib/packet_history.c
index 4805de9..1d4d6ee 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -35,7 +35,6 @@
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
-#include linux/module.h
 #include linux/string.h
 #include packet_history.h
 
@@ -277,39 +276,19 @@ void dccp_rx_hist_purge(struct dccp_rx_hist *hist, struct 
list_head *list)
 
 EXPORT_SYMBOL_GPL(dccp_rx_hist_purge);
 
-extern int __init dccp_li_init(void);
-extern void dccp_li_exit(void);
-
-static __init int packet_history_init(void)
+__init int packet_history_init(void)
 {
-   if (dccp_li_init() != 0)
-   goto out;
-
tfrc_tx_hist = kmem_cache_create(tfrc_tx_hist,
 sizeof(struct tfrc_tx_hist_entry), 0,
 SLAB_HWCACHE_ALIGN, NULL);
-   if (tfrc_tx_hist == NULL)
-   goto out_li_exit;
 
-   return 0;
-out_li_exit:
-   dccp_li_exit();
-out:
-   return -ENOBUFS;
+   return tfrc_tx_hist == NULL ? -ENOBUFS : 0;
 }
-module_init(packet_history_init);
 
-static __exit void packet_history_exit(void)
+void packet_history_exit(void)
 {
if (tfrc_tx_hist != NULL) {
kmem_cache_destroy(tfrc_tx_hist);
tfrc_tx_hist = NULL;
}
-   dccp_li_exit();
 }
-module_exit(packet_history_exit);
-
-MODULE_AUTHOR(Ian McDonald [EMAIL PROTECTED], 
- Arnaldo Carvalho de Melo [EMAIL PROTECTED]);
-MODULE_DESCRIPTION(DCCP TFRC library);
-MODULE_LICENSE(GPL);
diff --git a/net/dccp/ccids/lib/packet_history.h 
b/net/dccp/ccids/lib/packet_history.h
index 0670f46..9a2642e 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -39,8 +39,7 @@
 #include linux/ktime.h
 #include linux/list.h
 #include linux/slab.h
-
-#include ../../dccp.h
+#include tfrc.h
 
 /* Number of later packets received before one is considered lost */
 #define TFRC_RECV_NUM_LATE_LOSS 3
diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c
new file mode 100644
index 000..3a7a183
--- /dev/null
+++ b/net/dccp/ccids/lib/tfrc.c
@@ -0,0 +1,48 @@
+/*
+ * TFRC: main module holding the pieces of the TFRC library together
+ *
+ * Copyright (c) 2007 The University of Aberdeen, Scotland, UK
+ * Copyright (c) 2007 Arnaldo Carvalho de Melo [EMAIL PROTECTED]
+ */
+#include linux

Re: [PATCH 7/7] [TFRC] New rx history code

2007-12-05 Thread Arnaldo Carvalho de Melo
Em Wed, Dec 05, 2007 at 09:35:30AM +, Gerrit Renker escreveu:
 Quoting Arnaldo:
 | Em Tue, Dec 04, 2007 at 06:55:17AM +, Gerrit Renker escreveu:
 |  NAK. You have changed the control flow of the algorithm and the underlying
 |  data structure. Originally it had been an array of pointers, and this had
 |  been a design decision right from the first submissions in March. From I
 |  of 
 http://www.erg.abdn.ac.uk/users/gerrit/dccp/notes/ccid3_packet_reception/loss_detection/loss_detection_algorithm_notes.txt
 |   
 |   1. 'ring' is an array of pointers rather than a contiguous area in
 |   memory. The reason is that, when having to swap adjacent entries
 |   to sort the history, it is easier to swap pointers than to copy
 |   (heavy-weight) history-entry data structs.
 |  
 |  But in your algorithm it becomes a normal linear array.
 |  
 |  As a consequence all subsequent code needs to be rewritten to use
 |  copying instead of swapping pointers. And there is a lot of that, since
 |  the loss detection code makes heavy use of swapping entries in order to
 |  cope with reordered and/or delayed packets.
 | 
 | So lets not do that, the decision was made based on the RX history patch
 | alone, where, as pointed out, no swapping occurs.
 |  
 |  This is not what I would call entirely equivalent as you claim. Your
 |  algorithm is fundamentally different, the control flow is not the same,
 |  nor are the underlying data structures.
 | 
 | Its is equivalent up to what is in the tree, thank you for point out
 | that in subsequent patches it will be used.
 | 
 | Had this design decision been made explicit in the changeset comment
 | that introduces the data structure I wouldn't have tried to reduce the
 | number of allocations.
 | 
 | And you even said that it was a good decision when first reacting to
 | this change, which I saw as positive feedback for the RFC I sent.
 | 
 That was my fault. Your solution looked much better to me but even I had 
 forgotten
 that the algorithm is based on an array of pointers. When I finally sat down 

And you wrote that code, I acted on looking the code together with
reading the changeset comment, to me it also looked much better to do
that, but as there were many changes, I sent an [RFC] message. It served
its purpose, as you after two iterations realised it was in fact not a
good idea as later an array of pointers is required. I should have taken
that swap routine as the definitive hint that indeed it was needed.

 and looked through the patch set I found the original notes from March and
 saw that using a linear array instead of an array of pointers will require 
 quite
 a few changes and means changing the algorithm. I then thought through 
 whether there
 could be any advantage in using a linear array as in this patch, but could 
 find none.
 In the end this went back to the same questions and issues that were tackled 
 between
 February and March this year. I wish we could have had this dicussion then; 
 it would
 have made this email unnecessary.

That is why it is important that the changeset comment collects these
scattered notes and discussions. Think about in some years from now we
will be left with a situation where we would have to look at many places
to understando some aspects of what is in the code. While this happens
and we have google for that I think that since you keep such detailed
notes it is not a problem to get them in the changesets.
 
 I thank you for your replay and I am sorry if you took offense at my perhaps 
 a little strong
 reaction, here is the essence what I tried to convey but what maybe did not 
 succeed in conveying:
 
  * I am absolutely ok with you making changes to variable names, namespaces, 
 file organisation,
overall arrangement etc (as per previous email). You have experience and 
 this will often be a 
benefit. For instance, you combined tfrc_entry_from_skb() with 
 tfrc_rx_hist_update() to give
a new function, tfrc_rx_hist_add_packet(). This is ok since 
 entry_from_skb() is not otherwise
used (and I only found this out when reading your patch). (On the other 
 hand, since this is a
4-element ring buffer, it is no real adding, the same entry will 
 frequently be overwritten,
this is why it was initially called hist_update().)

From the RX history API user perspective (CCID3 right now, others may
come) it is adding a packet to the history. In the past it went to a
list, now we have a ring and don't keep more than TFRC_NDUPACK + 1
entries, but this is an internal detail that users don't need to know.
 
  * I am also not so much concerned about credit. As long as the (changed) end 
 result is as
least as good as or hopefully better than the submitted input, the author 
 name is a side
issue; so I do think that your approach is sound and fair. The only place 
 where credits
are needed is for the SatSix project (www.ist-satsix.org) which funded 
 this work. This is
why some of the 

Re: [PATCH v2 0/3][BUG-FIX]: Test tree updates and bug fixes

2007-12-05 Thread Arnaldo Carvalho de Melo
Em Wed, Dec 05, 2007 at 02:53:09PM +, Gerrit Renker escreveu:
 | Thanks, I folded this into the reorganized RX history handling patch,
 | together with reverting ccid3_hc_rx_packet_recv to very close to your
 | original patch, with this changes:
 | 
 | 1. no need to calculate the payload size for non data packets as this
 |value won't be used.
 | 2. Initialize hcrx-ccid3hcrx_bytes_recv with the payload size when
 |hcrx-ccid3hcrx_state == TFRC_RSTATE_NO_DATA.
 | 3. tfrc_rx_hist_duplicate() is only called when ccid3hcrx_state !=
 |TFRC_RSTATE_NO_DATA, so it doesn't need to goto the done_receiving
 |label (that was removed as this was its only use) as do_feedback
 |would always be CCID3_FBACK_NONE and so the test would always fail
 |and no feedback would be sent, so just return right there.
 | 
 | Now it reads:
 | 
 | static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 | {
 | struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
 | enum ccid3_fback_type do_feedback = CCID3_FBACK_NONE;
 | const u32 ndp = dccp_sk(sk)-dccps_options_received.dccpor_ndp;
 | const bool is_data_packet = dccp_data_packet(skb);
 | 
 | if (unlikely(hcrx-ccid3hcrx_state == TFRC_RSTATE_NO_DATA)) {
 | if (is_data_packet) {
 | const u32 payload = skb-len - 
 dccp_hdr(skb)-dccph_doff * 4;
 | do_feedback = FBACK_INITIAL;
 | ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA);
 | hcrx-ccid3hcrx_s =
 | hcrx-ccid3hcrx_bytes_recv = payload_size;
 
   == Please see other email regarding bytes_recv, but I think you already 
 got that.
   Maybe one could then write
   hcrx-ccid3hcrx_s = skb-len - dccp_hdr(skb)-dccph_doff * 4;

OK, I got that.
  
 | }
 | goto update_records;
 | }
 | 
 | if (tfrc_rx_hist_duplicate(hcrx-ccid3hcrx_hist, skb))
 | return; /* done receiving */
 | 
 | if (is_data_packet) {
 | const u32 payload = skb-len - dccp_hdr(skb)-dccph_doff * 4;
 | /*
 |  * Update moving-average of s and the sum of received payload 
 bytes
 |  */
 | hcrx-ccid3hcrx_s = tfrc_ewma(hcrx-ccid3hcrx_s, payload_size, 
 9);
 | hcrx-ccid3hcrx_bytes_recv += payload_size;
 | }
 | 
 | /*
 |  * Handle pending losses and otherwise check for new loss
 |  */
 | if (tfrc_rx_hist_new_loss_indicated(hcrx-ccid3hcrx_hist, skb, ndp))
 | goto update_records;
 | 
 | /*
 |  * Handle data packets: RTT sampling and monitoring p
 |  */
 | if (unlikely(!is_data_packet))
 | goto update_records;
 | 
 | if (list_empty(hcrx-ccid3hcrx_li_hist)) {  /* no loss so far: p = 0 */
 | const u32 sample = 
 tfrc_rx_hist_sample_rtt(hcrx-ccid3hcrx_hist, skb);
 == If you like, you could add the original comment here that p=0 if no   
 loss occured, i.e.
   /*
* Empty loss history: no loss so far, hence p stays 0.
* Sample RTT values, since an RTT estimate is required for the
* computation of p when the first loss occurs; RFC 3448, 6.3.1.
*/

done

 | if (sample != 0)
 | hcrx-ccid3hcrx_rtt = tfrc_ewma(hcrx-ccid3hcrx_rtt, 
 sample, 9);
 | }
 | 
 | /*
 |  * Check if the periodic once-per-RTT feedback is due; RFC 4342, 10.3
 |  */
 | if (SUB16(dccp_hdr(skb)-dccph_ccval, hcrx-ccid3hcrx_last_counter)  3)
 | do_feedback = CCID3_FBACK_PERIODIC;
 | 
 | update_records: 
 | tfrc_rx_hist_add_packet(hcrx-ccid3hcrx_hist, skb, ndp);
 | 
 ==  here a jump label is missing. It is not needed by this patch and
  above you have replaced it with a return + comment, but it is needed in 
 a later
  patch: when a new loss event occurs, the control jumps to 
 `done_receiving' and
  sends a feedback packet with type FBACK_PARAM_CHANGE.

OK, I was wondering that the user for FBACK_PARAM_CHANGE in
ccid3_hc_rx_send_feedback was in another patch.

 done_receiving:

Ok, we can add the jump label when we make use of it

 | if (do_feedback)
 | ccid3_hc_rx_send_feedback(sk, skb, do_feedback);
 | }
 | 
 
 | Now to some questions and please bear with me as I haven't got to the
 | patches after this:
 | 
 | tfrc_rx_hist-loss_count as of now is a boolean, my understanding is
 | that you are counting loss events, i.e. it doesn't matter in:
 |
 It is not a boolean, but uses a hidden trick which maybe should be commented:
  * here and in the TCP world NDUPACK = 3
  * hence the bitfield size for loss_count is 2 bits, which can express
at most 3 = NDUPACK (that is why it is declared as loss_count:2)
  * the trick is that when the loss count increases beyond 3, it automatically 
cycles back to 0 (although the code does not rely on that features
and does this 

Re: [PATCH v2 0/3][BUG-FIX]: Test tree updates and bug fixes

2007-12-05 Thread Arnaldo Carvalho de Melo
Em Wed, Dec 05, 2007 at 01:55:11PM +, Gerrit Renker escreveu:
 |  @@ -788,8 +782,8 @@ static void ccid3_hc_rx_packet_recv(stru
 |if (unlikely(hcrx-ccid3hcrx_state == TFRC_RSTATE_NO_DATA)) {
 |if (is_data_packet) {
 |do_feedback = FBACK_INITIAL;
 |  + hcrx-ccid3hcrx_s = payload_size;
 |ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA);
 |  - ccid3_hc_rx_update_s(hcrx, payload_size);
 | 
 | We have to set ccid3hcrx_bytes_recv to the payload_size here too, I'm
 | fixing this on the reworked patch that introduces the RX history.
 | 
 I almost did the same error again by wanting to agree too prematurely.
 
 But updating ccid3hcrx_bytes_recv is in fact not needed here and if it
 would be done it would not have a useable effect. The reason is that the
 first data packet will trigger the initial feedback; and in the initial
 feedback packet X_recv (which is ccid3hcrx_bytes_recv / the_time_spent)
 is set to 0 (RFC 3448, 6.3).
 
 For this reason, updating bytes_recv for the first data packet is also not
 in the flowchart on 
 http://www.erg.abdn.ac.uk/users/gerrit/dccp/notes/ccid3_packet_reception/

OK, I will add a comment on the code stating why it is not needed so
that new people don't commit the same mistake again.

- Arnaldo
-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 0/3][BUG-FIX]: Test tree updates and bug fixes

2007-12-05 Thread Arnaldo Carvalho de Melo
Em Wed, Dec 05, 2007 at 11:19:44AM +, Gerrit Renker escreveu:
 This revision fixes a bug present in the per-socket allocation of RX history 
 entries; identification of this bug is thanks to Arnaldo Carvalho de Melo.
 
 The bug was in not deallocating history entries when the allocation of 
 one array element failed. The solution in this revised patch set is the
 original one written by Arnaldo.

Ok, this one I posted with some other comments and explanations about
changes other than namespacing, renaming of some functions and removal
of functions not used in this patch.
 
- Arnaldo
-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 0/3][BUG-FIX]: Test tree updates and bug fixes

2007-12-05 Thread Arnaldo Carvalho de Melo
Em Wed, Dec 05, 2007 at 11:19:45AM +, Gerrit Renker escreveu:
 This fixes a problem in the initial revision of the patch: The loss interval
 history was not de-allocated when the initialisation of the packet history
 failed.  The identification of this problem is also thanks due to Arnaldo.
 
 The interdiff to the previous revision is:
 
   --- b/net/dccp/ccids/lib/tfrc_module.c
   +++ b/net/dccp/ccids/lib/tfrc_module.c
   @@ -26,7 +26,12 @@

   if (rc == 0)
   rc = packet_history_init();
   +   if (rc == 0)
   +   goto out;

   +out_free_loss_intervals:
   +   dccp_li_exit();
   +out:
   return rc;
}
 

Ok, this one is kept on the series I have pending submission.

- Arnaldo
-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC][PATCHES 0/7]: Reorganization of RX history patches

2007-12-05 Thread Arnaldo Carvalho de Melo
Note: removed Ingo from the CC list, I had added it first just because
he advocated reducing the number of mailing lists, so I wanted him to
know that we're trying to do that.

Em Wed, Dec 05, 2007 at 10:27:36AM +, Gerrit Renker escreveu:
 Today being Wednesday, below is some feedback after working through the patch 
 set.
 Hope this is helpful. 
 
 Patch #1:
 -
   Several new good points are introduced:
   - IP_DCCP_TFRC_DEBUG is made dependent on IP_DCCP_TFRC_DEBUG which is useful
   - the select from CONFIG_IP_DCCP_CCID3 = CONFIG_DCCP_TFRC_LIB
   - the cleanup action in tfrc_module_init() (when packet_history_init() 
 fails)
 was previously missing, this is a good catch.
   Also a note: tfrc_pr_debug() is not currently used (but may be later should 
 the 
code common to both CCID3 and CCID4 be shared).

OK
  
 Patches #2/#6:
 --
   Separated from main patch, no problems (were initially submitted in this 
 format).
   I wonder whether switching back to smaller patch sizes is better?

Patches that do one thing that is logically separated from others are
preferred, as its analisys is made faster.

Patches that rewrite an existing functionality are done best when the
functions being replaced keep as much as possible the same name. For
instance, in the new RX handling code we need to alloc the RX hist
internal structures, in the previous implementation we also needed to do
that, it also needs to purge entries, as the old one needed. Previous
one was a linked list, the new one is a ring. As we go we can and should
add new APIs when needed, but trying to keep an API so that if in the
future we decide to rework the internal structures, this can be done in
a plugin fashion, without changing too much its users (CCIDs in this
case), so that we can get back to the old one with minor disruption to
the users if needed. Sometimes we manage to do that, some times we need
to improve upon the current API, but the goal remains the same.
 
 Patch #3: 
 -
   Renames s/tfrc_tx_hist/tfrc_tx_hist_slab/g; no problems.

This was for consistency with the other slabs in DCCP:

[EMAIL PROTECTED] net-2.6.25]$ find net/dccp/ -name *.c | xargs grep 'struct 
kmem_cache'
net/dccp/ccids/lib/packet_history.c:static struct kmem_cache *tfrc_tx_hist_slab;
net/dccp/ccids/lib/packet_history.c:static struct kmem_cache *tfrc_rx_hist_slab;
net/dccp/ccids/lib/loss_interval.c:static struct kmem_cache *dccp_li_cachep 
__read_mostly;
net/dccp/ackvec.c:static struct kmem_cache *dccp_ackvec_slab;
net/dccp/ackvec.c:static struct kmem_cache *dccp_ackvec_record_slab;
net/dccp/ccid.c:struct kmem_cache *slab;
net/dccp/ccid.c:static void ccid_kmem_cache_destroy(struct kmem_cache *slab)
[EMAIL PROTECTED] net-2.6.25]$

 Patch #4:
 -
   packet_history_init() initialises both RX and TX history and is later 
 called by the module_init()
   function in net/dccp/ccids/lib/tfrc.c. Just a suggestion, it is possible to 
 simplify this further,
   by doing all the initialisation / cleanup in tfrc.c:
 int __init {rx,tx}_packet_history_init()
 {
   tfrc_{rx,tx}_hist_slab = kmem_cache_create(tfrc_{rx,tx}_hist, 
 ...);
   return tfrc_{rx,tx}_hist_slab == NULL ? - ENOBUFS : 0;
 }
   and then call these successively in tfrc_module_init().

Idea here was to have each C source file to have a init module. Perhaps
we should try to break packet_history.c into tx_packet_history and
rx_packet_history.c. We can do that later to try to meet the goal of
being able to see what is being replaced.
 
 Patch #5:
 -
   The naming scheme introduced here is 
 s/dccp_rx/tfrc_rx/g; 
 s/dccphrx/tfrchrx/g;
   I wonder, while at it, would it be possible to extend this and extend this 
 to other parts
   of the code? Basically this is the same discussion as earlier on [EMAIL 
 PROTECTED] with Leandro,
   who first came up with this naming scheme. There the same question came up 
 and the result
   of the discussion was that a prefix of  `tfrchrx' is cryptic; if something 
 simpler is 
   possible then it would be great.

As we did with the ackvecs, will do that later.
 
 Patch #7:
 -
  * ccid3_hc_rx_detect_loss() can be fully removed since no other code 
 references it any
further.

I left it to try have the diff not mixing up removal of its
implementation with the implementation of the function just after it,
ccid3_hx_rx_packet_recv. Will remove it later.

  * bytes_recv also counts the payload_size's of non-data packets, which is 
 wrong (it should only
sum up the sum of bytes transferred as data packets)

As said to you in private message I'll get back to the way you wrote, as
you mentioned that upcoming patches will make good use of that.

I'll try to restrain me to not do too many changes.
 
  * loss handling is not correctly taken care of: unlike in the other part, 
 both data and non-data
packets are used to detect loss (this is 

Re: [PATCH v2 0/3][BUG-FIX]: Test tree updates and bug fixes

2007-12-05 Thread Arnaldo Carvalho de Melo
Em Wed, Dec 05, 2007 at 11:19:46AM +, Gerrit Renker escreveu:
 This patch removes the following redundancies:
   * ccid3_hc_rx_update_s() is only called for data packets (that is what it 
 should be called for);
   * each call to ccid3_hc_rx_update_s() is wrapped inside a if 
 (is_data_packet) test';
   * therefore testing each time if len  0 is redundant (pointed out by 
 Arnaldo);
   * no other code calls this function, hence the inline function can go.
 
 Also replaced the first call to updating s with direct assignment of 
 `payload_size'; this has also
 been pointed out by Arnaldo.

Thanks, I folded this into the reorganized RX history handling patch,
together with reverting ccid3_hc_rx_packet_recv to very close to your
original patch, with this changes:

1. no need to calculate the payload size for non data packets as this
   value won't be used.
2. Initialize hcrx-ccid3hcrx_bytes_recv with the payload size when
   hcrx-ccid3hcrx_state == TFRC_RSTATE_NO_DATA.
3. tfrc_rx_hist_duplicate() is only called when ccid3hcrx_state !=
   TFRC_RSTATE_NO_DATA, so it doesn't need to goto the done_receiving
   label (that was removed as this was its only use) as do_feedback
   would always be CCID3_FBACK_NONE and so the test would always fail
   and no feedback would be sent, so just return right there.

Now it reads:

static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
{
struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
enum ccid3_fback_type do_feedback = CCID3_FBACK_NONE;
const u32 ndp = dccp_sk(sk)-dccps_options_received.dccpor_ndp;
const bool is_data_packet = dccp_data_packet(skb);

if (unlikely(hcrx-ccid3hcrx_state == TFRC_RSTATE_NO_DATA)) {
if (is_data_packet) {
const u32 payload = skb-len - 
dccp_hdr(skb)-dccph_doff * 4;
do_feedback = FBACK_INITIAL;
ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA);
hcrx-ccid3hcrx_s =
hcrx-ccid3hcrx_bytes_recv = payload_size;
}
goto update_records;
}

if (tfrc_rx_hist_duplicate(hcrx-ccid3hcrx_hist, skb))
return; /* done receiving */

if (is_data_packet) {
const u32 payload = skb-len - dccp_hdr(skb)-dccph_doff * 4;
/*
 * Update moving-average of s and the sum of received payload 
bytes
 */
hcrx-ccid3hcrx_s = tfrc_ewma(hcrx-ccid3hcrx_s, payload_size, 
9);
hcrx-ccid3hcrx_bytes_recv += payload_size;
}

/*
 * Handle pending losses and otherwise check for new loss
 */
if (tfrc_rx_hist_new_loss_indicated(hcrx-ccid3hcrx_hist, skb, ndp))
goto update_records;

/*
 * Handle data packets: RTT sampling and monitoring p
 */
if (unlikely(!is_data_packet))
goto update_records;

if (list_empty(hcrx-ccid3hcrx_li_hist)) {  /* no loss so far: p = 0 */
const u32 sample = 
tfrc_rx_hist_sample_rtt(hcrx-ccid3hcrx_hist, skb);
if (sample != 0)
hcrx-ccid3hcrx_rtt = tfrc_ewma(hcrx-ccid3hcrx_rtt, 
sample, 9);
}

/*
 * Check if the periodic once-per-RTT feedback is due; RFC 4342, 10.3
 */
if (SUB16(dccp_hdr(skb)-dccph_ccval, hcrx-ccid3hcrx_last_counter)  3)
do_feedback = CCID3_FBACK_PERIODIC;

update_records: 
tfrc_rx_hist_add_packet(hcrx-ccid3hcrx_hist, skb, ndp);

if (do_feedback)
ccid3_hc_rx_send_feedback(sk, skb, do_feedback);
}

Now to some questions and please bear with me as I haven't got to the
patches after this:

tfrc_rx_hist-loss_count as of now is a boolean, my understanding is
that you are counting loss events, i.e. it doesn't matter in:

/* any data packets missing between last reception and skb ? */
int tfrc_rx_hist_new_loss_indicated(struct tfrc_rx_hist *h,
const struct sk_buff *skb, u32 ndp)
{
int delta = dccp_delta_seqno(tfrc_rx_hist_last_rcv(h)-tfrchrx_seqno,
 DCCP_SKB_CB(skb)-dccpd_seq);

if (delta  1  ndp  delta)
tfrc_rx_hist_loss_indicated(h);

return tfrc_rx_hist_loss_pending(h);
}

if (delta - ndp) is  1, i.e. tfrc_rx_hist-loss_count only indicates
that there was loss. But then in other parts of the code it assumes it
can be more than 1:

/**
 * tfrc_rx_hist  -  RX history structure for TFRC-based protocols
 *
 * @ring:   Packet history for RTT sampling and loss detection
 * @loss_count: Number of entries in circular history
 * @loss_start: Movable index (for loss detection)
 * @rtt_sample_prev:Used during RTT sampling, points to candidate entry
 */
struct tfrc_rx_hist {
struct tfrc_rx_hist_entry 

Re: [RFC][PATCHES 0/7]: Reorganization of RX history patches

2007-12-03 Thread Arnaldo Carvalho de Melo
Em Mon, Dec 03, 2007 at 08:35:12AM +, Gerrit Renker escreveu:
 Hi Arnaldo,
 
 hank you for going through this. I have just backported your recent patches 
 of 2.6.25
 to the DCCP/CCID4/Faster Restart test tree at 
   git://eden-feed.erg.abdn.ac.uk/dccp_exp {dccp,ccid4,dccp_fr}
 as per subsequent message.
 |  do, so please consider moving DCCP discussion to [EMAIL PROTECTED],
 |  where lots of smart networking folks are present and can help our 
 efforts
 |  on turning RFCs to code.
 Are you suggesting using netdev exclusively or in addition to [EMAIL 
 PROTECTED]

Well, since at least one person that has contributed significantly in
the past has said he can't cope with traffic on netdev, we can CC
[EMAIL PROTECTED]

 
 | Please take a look at this patch series where I reorganized your work 
 on the new
 | TFRC rx history handling code. I'll wait for your considerations and then 
 do as many
 | interactions as reasonable to get your work merged.
 | 
 | It should be completely equivalent, plus some fixes and optimizations, 
 such as:
 It will be necessary to address these points one-by-one. Before diving into 
 making
 fixes and `optimisations', have you tested your code? The patches you are 
 referring to

I have performed basic tests, and when doing so noticed a bug in
inet_diag, that I commented with Herbert Xu and he has since provided a
fix.

 have been posted and re-posted for a period of over 9 months on [EMAIL 
 PROTECTED], and

Being posted and re-posted does not guarantee that the patch is OK or
that is in a form that is acceptable to all tree maintainers. DaveM is
subscribed to [EMAIL PROTECTED] and could have picked this if he had the time to
do the review. I didn't, but now I'm trying to spend my time on
reviewing your patches and in the process doing modifications I find
appropriate while trying hard not to introduce bugs in your hard work
to get it merged.

 there are regression tests which show that this code improves on the existing 
 Linux
 implementation. These are labelled as `test tree' on 
   http://www.linux-foundation.org/en/Net:DCCP_Testing#Regression_testing
 So if you are making changes to the code, I would like to ask if you have run 
 similar
 regression tests, to avoid having to step back later.

Fair enough, I will do that before asking Herbert or Dave to pull from
my tree.
 
 | . The code that allocates the RX ring deals with failures when one of the 
 entries in
 |   the ring buffer is not successfully allocated, the original code was 
 leaking the
 |   successfully allocated entries.

Sorry for not point out exactly this, here it goes:

Your original patch:

+int tfrc_rx_hist_init(struct tfrc_rx_hist *h)
+{
+   int i;
+
+   for (i = 0; i = NDUPACK; i++) {
+   h-ring[i] = kmem_cache_alloc(tfrc_rxh_cache, GFP_ATOMIC);
+   if (h-ring[i] == NULL)
+   return 1;
+   }
+   h-loss_count = 0;
+   h-loss_start = 0;
+   return 0;
+}
+EXPORT_SYMBOL_GPL(tfrc_rx_hist_init);

Then, in ccid3_hc_rx_init you do:

static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk)
{
struct ccid3_hc_rx_sock *hcrx = ccid_priv(ccid);

ccid3_pr_debug(entry\n);

hcrx-ccid3hcrx_state = TFRC_RSTATE_NO_DATA;
tfrc_lh_init(hcrx-ccid3hcrx_li_hist);
return tfrc_rx_hist_init(hcrx-ccid3hcrx_hist);
}

So if tfrc_rx_hist_init fail to allocate, say, the last entry in the
ring, it will return 1, and from looking at how you initialize
h-loss_{count,start} to zero I assumed that the whole tfrc_rx_hist
is not zeroed when tfrc_rx_hist_init is called, so one can also assume
that the ring entries are not initialized to NULL and that if the
error recovery is to assume that later on tfrc_rx_hist_cleanup is called
we would not have a leak, but an OOPS when tfrc_rx_hist_cleanup tries
to call kfree_cache_free on the uninitialized ring entries.

But if you look at ccid_new(), that calls ccid3_hc_rx_init(), you'll see
that when the ccid rx or hx routine fails, it just frees the
struct ccid with the area where h-ring is, so, what was not cleaned up
by the ccid init routine is effectively forgot, leaked.

I first did the cleanup at tfrc_rx_hist_init (that I renamed to
tfrc_rx_hist_alloc, since it doesn't just initializes things, but
allocates entries from slab), but then I just made the rx history slab
have arrays of tfrc_rx_hist_entry objects, not individual ones as your
code always allocates them as arrays.

 | . We do just one allocation for the ring buffer, as the number of entries 
 is fixed we
 |   should just do one allocation and not TFRC_NDUPACK times.
 Will look at the first point in the patch; with regard to the second point I 
 agree, this
 will make the code simpler, which is good. 

good

 | . I haven't checked if all the code was commited, as I tried to introduce 
 just what was
 |   immediatelly used, probably we'll need to do some changes when working on 
 the merge
 |   of 

Re: [RFC][PATCHES 0/7]: Reorganization of RX history patches

2007-12-03 Thread Arnaldo Carvalho de Melo
Em Mon, Dec 03, 2007 at 01:49:47PM +, Gerrit Renker escreveu:
 |  Are you suggesting using netdev exclusively or in addition to [EMAIL 
 PROTECTED]
 | 
 | Well, since at least one person that has contributed significantly in
 | the past has said he can't cope with traffic on netdev, we can CC
 | dccp@vger.kernel.org
 I have a similar problem with the traffic but agree and will copy as well.
 
 
 |  have been posted and re-posted for a period of over 9 months on [EMAIL 
 PROTECTED], and
 | 
 | Being posted and re-posted does not guarantee that the patch is OK or
 | that is in a form that is acceptable to all tree maintainers.

 With the first point I couldn't agree more, but this is not really what I 
 meant - the point
 was that despite posting and re-posting there was often silence. And now 
 there is feedback,
 in form of a patchset made by you; and all that I am asking for is just to be 
 given the time to
 look that through. Last time a RFC patch appeared at 3pm and was checked in 
 the same evening
 (only found out next morning).

I was too optimistic about that one, feeling that it was safe, sorry
about that, will avoid doing that in the future.

 With your experience and long background as a maintainer maybe you expect 
 quicker turn-around
 times, but I also had waited patiently until you had had a chance to review 
 the stuff I sent.

Agreed, my bad, will be more patient with my side as you've been with
yours :-)

 |  | . The code that allocates the RX ring deals with failures when one of 
 the entries in
 |  |   the ring buffer is not successfully allocated, the original code was 
 leaking the
 |  |   successfully allocated entries.
 | 
 | Sorry for not point out exactly this, here it goes:
 | 
 | Your original patch:
 | 
 | +int tfrc_rx_hist_init(struct tfrc_rx_hist *h)
 | +{
 | +   int i;
 | +
 | +   for (i = 0; i = NDUPACK; i++) {
 | +   h-ring[i] = kmem_cache_alloc(tfrc_rxh_cache, GFP_ATOMIC);
 | +   if (h-ring[i] == NULL)
 | +   return 1;
 | +   }
 | +   h-loss_count = 0;
 | +   h-loss_start = 0;
 | +   return 0;
 | +}
 | +EXPORT_SYMBOL_GPL(tfrc_rx_hist_init);
 | 
 I expected this and it actually was very clear from your original message. I 
 fully back up
 your solution in this point, see below. But my question above was rather: are 
 there any
 other bugs rather than the above leakage, which is what the previous email 
 seemed to indicate?
 
 With regard to your solution - you are using
 
   int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h)
   {
   h-ring = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC);
   h-loss_count = h-loss_start = 0;
   return h-ring == NULL;
   }
 
 which is better not only for one but for two reasons. It solves the leakage 
 and in addition makes
 the entire code simpler. Fully agreed.

good
   
 
 | 
 |  | . I haven't checked if all the code was commited, as I tried to 
 introduce just what was
 |  |   immediatelly used, probably we'll need to do some changes when 
 working on the merge
 |  |   of your loss intervals code.
 |  Sorry I don't understand this point.
 | 
 | Let me check now and tell you for sure:
 | 
 | tfrc_rx_hist_delta_seqno and tfrc_rx_hist_swap were not included, as
 | they were not used, we should introduce them later, when getting to the
 | working on the loss interval code.
 Ah thanks, that was really not clear. Just beginning to work through the set.

great
 
 |static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff 
 *skb)
 |{
 |// ...
 |u32 sample, payload_size = skb-len - dccp_hdr(skb)-dccph_doff 
 * 4;
 |  
 |if (unlikely(hcrx-ccid3hcrx_state == TFRC_RSTATE_NO_DATA)) {
 |if (is_data_packet) {
 |do_feedback = FBACK_INITIAL;
 |ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA);
 |ccid3_hc_rx_update_s(hcrx, payload_size);
 |}
 |goto update_records;
 |}
 |  
 |  == Non-data packets are ignored for the purposes of computing s (this is 
 in the RFC),
 |  consequently update_s() is only called for data packets; using the 
 two following
 |  functions:
 |  
 |  
 |static inline u32 tfrc_ewma(const u32 avg, const u32 newval, const u8 
 weight)
 |{
 |return avg ? (weight * avg + (10 - weight) * newval) / 10 : 
 newval;
 |}
 | 
 | I hadn't considered that tfrc_ewma would for every packet check if the
 | avg was 0 and I find it suboptimal now that I look at it, we are just
 | feeding data packets, no? 
 Yes exactly, only data packets are used for s.
 
 |static inline void ccid3_hc_rx_update_s(struct ccid3_hc_rx_sock *hcrx, 
 int len)
 |{
 |if (likely(len  0))/* don't update on empty packets (e.g. 
 ACKs) */
 |hcrx-ccid3hcrx_s = tfrc_ewma(hcrx-ccid3hcrx_s, len, 
 9);
 |}
 | 
 | And we 

[PATCH 6/7] [CCID3]: The receiver of a half-connection does not set window counter values

2007-12-02 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

Only the sender sets window counters [RFC 4342, sections 5 and 8.1].

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Ian McDonald [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/ccids/ccid3.c |1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index c95dca8..5ff5aab 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -733,7 +733,6 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, 
struct sk_buff *skb)
return 0;
 
hcrx = ccid3_hc_rx_sk(sk);
-   DCCP_SKB_CB(skb)-dccpd_ccval = hcrx-ccid3hcrx_ccval_last_counter;
 
if (dccp_packet_without_ack(skb))
return 0;
-- 
1.5.3.4

-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/7] [TFRC]: Provide central source file and debug facility

2007-12-02 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

This patch changes the tfrc_lib module in the following manner:

 (1) a dedicated tfrc source file to call the packet history 
 loss interval init/exit functions.
 (2) a dedicated tfrc_pr_debug macro with toggle switch `tfrc_debug'.

Commiter note: renamed tfrc_module.c to tfrc.c, and made CONFIG_IP_DCCP_CCID3
select IP_DCCP_TFRC_LIB.

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Ian McDonald [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/ccids/Kconfig  |   13 ++---
 net/dccp/ccids/lib/Makefile |2 +-
 net/dccp/ccids/lib/packet_history.c |   27 ++-
 net/dccp/ccids/lib/packet_history.h |3 +-
 net/dccp/ccids/lib/tfrc.c   |   48 +++
 net/dccp/ccids/lib/tfrc.h   |   17 +---
 6 files changed, 75 insertions(+), 35 deletions(-)
 create mode 100644 net/dccp/ccids/lib/tfrc.c

diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index 3d7d867..1227594 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -38,6 +38,7 @@ config IP_DCCP_CCID2_DEBUG
 config IP_DCCP_CCID3
tristate CCID3 (TCP-Friendly) (EXPERIMENTAL)
def_tristate IP_DCCP
+   select IP_DCCP_TFRC_LIB
---help---
  CCID 3 denotes TCP-Friendly Rate Control (TFRC), an equation-based
  rate-controlled congestion control mechanism.  TFRC is designed to
@@ -63,10 +64,6 @@ config IP_DCCP_CCID3
 
  If in doubt, say M.
 
-config IP_DCCP_TFRC_LIB
-   depends on IP_DCCP_CCID3
-   def_tristate IP_DCCP_CCID3
-
 config IP_DCCP_CCID3_DEBUG
  bool CCID3 debugging messages
  depends on IP_DCCP_CCID3
@@ -110,5 +107,13 @@ config IP_DCCP_CCID3_RTO
is serious network congestion: experimenting with larger values 
should
therefore not be performed on WANs.
 
+config IP_DCCP_TFRC_LIB
+   tristate
+   default n
+
+config IP_DCCP_TFRC_DEBUG
+   bool
+   depends on IP_DCCP_TFRC_LIB
+   default y if IP_DCCP_CCID3_DEBUG
 
 endmenu
diff --git a/net/dccp/ccids/lib/Makefile b/net/dccp/ccids/lib/Makefile
index 5f940a6..68c93e3 100644
--- a/net/dccp/ccids/lib/Makefile
+++ b/net/dccp/ccids/lib/Makefile
@@ -1,3 +1,3 @@
 obj-$(CONFIG_IP_DCCP_TFRC_LIB) += dccp_tfrc_lib.o
 
-dccp_tfrc_lib-y := loss_interval.o packet_history.o tfrc_equation.o
+dccp_tfrc_lib-y := tfrc.o tfrc_equation.o packet_history.o loss_interval.o
diff --git a/net/dccp/ccids/lib/packet_history.c 
b/net/dccp/ccids/lib/packet_history.c
index 4805de9..1d4d6ee 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -35,7 +35,6 @@
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
-#include linux/module.h
 #include linux/string.h
 #include packet_history.h
 
@@ -277,39 +276,19 @@ void dccp_rx_hist_purge(struct dccp_rx_hist *hist, struct 
list_head *list)
 
 EXPORT_SYMBOL_GPL(dccp_rx_hist_purge);
 
-extern int __init dccp_li_init(void);
-extern void dccp_li_exit(void);
-
-static __init int packet_history_init(void)
+__init int packet_history_init(void)
 {
-   if (dccp_li_init() != 0)
-   goto out;
-
tfrc_tx_hist = kmem_cache_create(tfrc_tx_hist,
 sizeof(struct tfrc_tx_hist_entry), 0,
 SLAB_HWCACHE_ALIGN, NULL);
-   if (tfrc_tx_hist == NULL)
-   goto out_li_exit;
 
-   return 0;
-out_li_exit:
-   dccp_li_exit();
-out:
-   return -ENOBUFS;
+   return tfrc_tx_hist == NULL ? -ENOBUFS : 0;
 }
-module_init(packet_history_init);
 
-static __exit void packet_history_exit(void)
+void packet_history_exit(void)
 {
if (tfrc_tx_hist != NULL) {
kmem_cache_destroy(tfrc_tx_hist);
tfrc_tx_hist = NULL;
}
-   dccp_li_exit();
 }
-module_exit(packet_history_exit);
-
-MODULE_AUTHOR(Ian McDonald [EMAIL PROTECTED], 
- Arnaldo Carvalho de Melo [EMAIL PROTECTED]);
-MODULE_DESCRIPTION(DCCP TFRC library);
-MODULE_LICENSE(GPL);
diff --git a/net/dccp/ccids/lib/packet_history.h 
b/net/dccp/ccids/lib/packet_history.h
index 0670f46..9a2642e 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -39,8 +39,7 @@
 #include linux/ktime.h
 #include linux/list.h
 #include linux/slab.h
-
-#include ../../dccp.h
+#include tfrc.h
 
 /* Number of later packets received before one is considered lost */
 #define TFRC_RECV_NUM_LATE_LOSS 3
diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c
new file mode 100644
index 000..3a7a183
--- /dev/null
+++ b/net/dccp/ccids/lib/tfrc.c
@@ -0,0 +1,48 @@
+/*
+ * TFRC: main module holding the pieces of the TFRC library together
+ *
+ * Copyright (c) 2007 The University of Aberdeen, Scotland, UK
+ * Copyright (c) 2007 Arnaldo Carvalho de Melo [EMAIL PROTECTED]
+ */
+#include linux

[PATCH 2/7] [DCCP]: Introduce generic function to test for `data packets'

2007-12-02 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

as per  RFC 4340, sec. 7.7.

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Ian McDonald [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/dccp.h |   12 
 1 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index ee97950..f4a5ea1 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -334,6 +334,7 @@ struct dccp_skb_cb {
 
 #define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)((__skb)-cb[0]))
 
+/* RFC 4340, sec. 7.7 */
 static inline int dccp_non_data_packet(const struct sk_buff *skb)
 {
const __u8 type = DCCP_SKB_CB(skb)-dccpd_type;
@@ -346,6 +347,17 @@ static inline int dccp_non_data_packet(const struct 
sk_buff *skb)
   type == DCCP_PKT_SYNCACK;
 }
 
+/* RFC 4340, sec. 7.7 */
+static inline int dccp_data_packet(const struct sk_buff *skb)
+{
+   const __u8 type = DCCP_SKB_CB(skb)-dccpd_type;
+
+   return type == DCCP_PKT_DATA ||
+  type == DCCP_PKT_DATAACK  ||
+  type == DCCP_PKT_REQUEST  ||
+  type == DCCP_PKT_RESPONSE;
+}
+
 static inline int dccp_packet_without_ack(const struct sk_buff *skb)
 {
const __u8 type = DCCP_SKB_CB(skb)-dccpd_type;
-- 
1.5.3.4

-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 5/7] [TFRC]: Rename dccp_rx_ to tfrc_rx_

2007-12-02 Thread Arnaldo Carvalho de Melo
This is in preparation for merging the new rx history code written by Gerrit 
Renker.

Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/ccids/ccid3.c  |   32 ++--
 net/dccp/ccids/lib/loss_interval.c  |   14 +++---
 net/dccp/ccids/lib/packet_history.c |   90 +-
 net/dccp/ccids/lib/packet_history.h |   48 +-
 4 files changed, 92 insertions(+), 92 deletions(-)

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 07920bb..c95dca8 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -677,7 +677,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk)
 {
struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
struct dccp_sock *dp = dccp_sk(sk);
-   struct dccp_rx_hist_entry *packet;
+   struct tfrc_rx_hist_entry *packet;
ktime_t now;
suseconds_t delta;
 
@@ -701,7 +701,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk)
return;
}
 
-   packet = dccp_rx_hist_find_data_packet(hcrx-ccid3hcrx_hist);
+   packet = tfrc_rx_hist_find_data_packet(hcrx-ccid3hcrx_hist);
if (unlikely(packet == NULL)) {
DCCP_WARN(%s(%p), no data packet in history!\n,
  dccp_role(sk), sk);
@@ -709,7 +709,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk)
}
 
hcrx-ccid3hcrx_tstamp_last_feedback = now;
-   hcrx-ccid3hcrx_ccval_last_counter   = packet-dccphrx_ccval;
+   hcrx-ccid3hcrx_ccval_last_counter   = packet-tfrchrx_ccval;
hcrx-ccid3hcrx_bytes_recv   = 0;
 
if (hcrx-ccid3hcrx_p == 0)
@@ -752,12 +752,12 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, 
struct sk_buff *skb)
 }
 
 static int ccid3_hc_rx_detect_loss(struct sock *sk,
-   struct dccp_rx_hist_entry *packet)
+   struct tfrc_rx_hist_entry *packet)
 {
struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
-   struct dccp_rx_hist_entry *rx_hist =
-   dccp_rx_hist_head(hcrx-ccid3hcrx_hist);
-   u64 seqno = packet-dccphrx_seqno;
+   struct tfrc_rx_hist_entry *rx_hist =
+   tfrc_rx_hist_head(hcrx-ccid3hcrx_hist);
+   u64 seqno = packet-tfrchrx_seqno;
u64 tmp_seqno;
int loss = 0;
u8 ccval;
@@ -766,9 +766,9 @@ static int ccid3_hc_rx_detect_loss(struct sock *sk,
tmp_seqno = hcrx-ccid3hcrx_seqno_nonloss;
 
if (!rx_hist ||
-  follows48(packet-dccphrx_seqno, hcrx-ccid3hcrx_seqno_nonloss)) {
+  follows48(packet-tfrchrx_seqno, hcrx-ccid3hcrx_seqno_nonloss)) {
hcrx-ccid3hcrx_seqno_nonloss = seqno;
-   hcrx-ccid3hcrx_ccval_nonloss = packet-dccphrx_ccval;
+   hcrx-ccid3hcrx_ccval_nonloss = packet-tfrchrx_ccval;
goto detect_out;
}
 
@@ -789,7 +789,7 @@ static int ccid3_hc_rx_detect_loss(struct sock *sk,
dccp_inc_seqno(tmp_seqno);
hcrx-ccid3hcrx_seqno_nonloss = tmp_seqno;
dccp_inc_seqno(tmp_seqno);
-   while (dccp_rx_hist_find_entry(hcrx-ccid3hcrx_hist,
+   while (tfrc_rx_hist_find_entry(hcrx-ccid3hcrx_hist,
   tmp_seqno, ccval)) {
hcrx-ccid3hcrx_seqno_nonloss = tmp_seqno;
hcrx-ccid3hcrx_ccval_nonloss = ccval;
@@ -799,13 +799,13 @@ static int ccid3_hc_rx_detect_loss(struct sock *sk,
 
/* FIXME - this code could be simplified with above while */
/* but works at moment */
-   if (follows48(packet-dccphrx_seqno, hcrx-ccid3hcrx_seqno_nonloss)) {
+   if (follows48(packet-tfrchrx_seqno, hcrx-ccid3hcrx_seqno_nonloss)) {
hcrx-ccid3hcrx_seqno_nonloss = seqno;
-   hcrx-ccid3hcrx_ccval_nonloss = packet-dccphrx_ccval;
+   hcrx-ccid3hcrx_ccval_nonloss = packet-tfrchrx_ccval;
}
 
 detect_out:
-   dccp_rx_hist_add_packet(hcrx-ccid3hcrx_hist,
+   tfrc_rx_hist_add_packet(hcrx-ccid3hcrx_hist,
hcrx-ccid3hcrx_li_hist, packet,
hcrx-ccid3hcrx_seqno_nonloss);
return loss;
@@ -815,7 +815,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct 
sk_buff *skb)
 {
struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
const struct dccp_options_received *opt_recv;
-   struct dccp_rx_hist_entry *packet;
+   struct tfrc_rx_hist_entry *packet;
u32 p_prev, r_sample, rtt_prev;
int loss, payload_size;
ktime_t now;
@@ -850,7 +850,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct 
sk_buff *skb)
return;
}
 
-   packet = dccp_rx_hist_entry_new(opt_recv-dccpor_ndp, skb, GFP_ATOMIC);
+   packet = tfrc_rx_hist_entry_new(opt_recv-dccpor_ndp, skb, GFP_ATOMIC);
if (unlikely(packet == NULL

Re: [RFC]: tfrc_tx_hist_rtt

2007-11-30 Thread Arnaldo Carvalho de Melo
Em Fri, Nov 30, 2007 at 12:19:36PM +, Gerrit Renker escreveu:
 Sorry I only got this email today and it is a busy day, too.
 
 The changes look good and are in general a further improvement on the
 code. I like the idea of hiding the internals of the list structure in 
 the source file.
 
 I wonder if one could go one step further and also take the timestamp
 directly when looking up the previous history sample, e.g.
 
   u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno)
   {
   u32 rtt = 0;
   struct tfrc_tx_hist_entry *packet = 
 tfrc_tx_hist_find_entry(head, seqno);
   
   if (packet != NULL) {
   rtt = ktime_us_delta(ktime_get_real(), packet-stamp);
   /*
* Garbage-collect older (irrelevant) entries:
*/
   tfrc_tx_hist_purge(packet-next);
   }
   
   return rtt;
   }
 
 Just a suggestion.

I thought about that, but ccid3_hc_tx_packet_recv needs a timestamp some
lines below, when we reuse the ktime_get_real call.

- Arnaldo
-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC]: tfrc_tx_hist_rtt

2007-11-29 Thread Arnaldo Carvalho de Melo
Hi Gerrit,

Coming back to hiding the TX history data structures, take a
look at what I'm commiting to my tree:

1. struct tfrc_tx_hist_entry is completely hidden in
   net/dccp/ccids/lib/packet_history.c
2. I found tfrc_tx_hist_when confusing and returning two results, if it
   had found the ackno in the history and the timestamp, so I introduced
   tfrc_tx_hist_rtt instead, that returns 0 if the ackno was not found.

Does this looks reasonable?

- Arnaldo

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 2668de8..5dea690 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -399,7 +399,6 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct 
sk_buff *skb)
 {
struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
struct ccid3_options_received *opt_recv;
-   struct tfrc_tx_hist_entry *packet;
ktime_t now;
unsigned long t_nfb;
u32 pinv, r_sample;
@@ -414,19 +413,17 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, 
struct sk_buff *skb)
switch (hctx-ccid3hctx_state) {
case TFRC_SSTATE_NO_FBACK:
case TFRC_SSTATE_FBACK:
+   now = ktime_get_real();
+
/* estimate RTT from history if ACK number is valid */
-   packet = tfrc_tx_hist_find_entry(hctx-ccid3hctx_hist,
-
DCCP_SKB_CB(skb)-dccpd_ack_seq);
-   if (packet == NULL) {
+   r_sample = tfrc_tx_hist_rtt(hctx-ccid3hctx_hist,
+   DCCP_SKB_CB(skb)-dccpd_ack_seq, 
now);
+   if (r_sample == 0) {
DCCP_WARN(%s(%p): %s with bogus ACK-%llu\n, 
dccp_role(sk), sk,
  
dccp_packet_name(DCCP_SKB_CB(skb)-dccpd_type),
  (unsigned long 
long)DCCP_SKB_CB(skb)-dccpd_ack_seq);
return;
}
-   /*
-* Garbage-collect older (irrelevant) entries
-*/
-   tfrc_tx_hist_purge(packet-next);
 
/* Update receive rate in units of 64 * bytes/second */
hctx-ccid3hctx_x_recv = opt_recv-ccid3or_receive_rate;
@@ -438,12 +435,10 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, 
struct sk_buff *skb)
hctx-ccid3hctx_p = 0;
else   /* can not exceed 100% */
hctx-ccid3hctx_p = 100 / pinv;
-
-   now = ktime_get_real();
/*
-* Calculate new RTT sample and update moving average
+* Validate new RTT sample and update moving average
 */
-   r_sample = dccp_sample_rtt(sk, ktime_us_delta(now, 
packet-stamp));
+   r_sample = dccp_sample_rtt(sk, r_sample);
hctx-ccid3hctx_rtt = tfrc_ewma(hctx-ccid3hctx_rtt, r_sample, 
9);
 
if (hctx-ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
diff --git a/net/dccp/ccids/lib/packet_history.c 
b/net/dccp/ccids/lib/packet_history.c
index 1397360..4805de9 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -39,12 +39,24 @@
 #include linux/string.h
 #include packet_history.h
 
+/**
+ *  tfrc_tx_hist_entry  -  Simple singly-linked TX history list
+ *  @next:  next oldest entry (LIFO order)
+ *  @seqno: sequence number of this entry
+ *  @stamp: send time of packet with sequence number @seqno
+ */
+struct tfrc_tx_hist_entry {
+   struct tfrc_tx_hist_entry *next;
+   u64   seqno;
+   ktime_t   stamp;
+};
+
 /*
  * Transmitter History Routines
  */
 static struct kmem_cache *tfrc_tx_hist;
 
-struct tfrc_tx_hist_entry *
+static struct tfrc_tx_hist_entry *
tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
 {
while (head != NULL  head-seqno != seqno)
@@ -52,7 +64,6 @@ struct tfrc_tx_hist_entry *
 
return head;
 }
-EXPORT_SYMBOL_GPL(tfrc_tx_hist_find_entry);
 
 int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno)
 {
@@ -83,6 +94,24 @@ void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp)
 }
 EXPORT_SYMBOL_GPL(tfrc_tx_hist_purge);
 
+u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno,
+const ktime_t now)
+{
+   u32 rtt = 0;
+   struct tfrc_tx_hist_entry *packet = tfrc_tx_hist_find_entry(head, 
seqno);
+
+   if (packet != NULL) {
+   rtt = ktime_us_delta(now, packet-stamp);
+   /*
+* Garbage-collect older (irrelevant) entries:
+*/
+   tfrc_tx_hist_purge(packet-next);
+   }
+
+   return rtt;
+}
+EXPORT_SYMBOL_GPL(tfrc_tx_hist_rtt);
+
 /*
  * Receiver History Routines
  */
diff --git a/net/dccp/ccids/lib/packet_history.h 
b/net/dccp/ccids/lib/packet_history.h
index 5c07182..0670f46 100644
--- 

[PATCH 1/1][TFRC]: Hide tx history details from the CCIDs

2007-11-29 Thread Arnaldo Carvalho de Melo
Hi Herbert,

Please consider pulling from:

master.kernel.org:/pub/scm/linux/kernel/git/acme/net-2.6.25

Thanks a lot!

- Arnaldo

commit f1ff09e7955a2cffa3dfdda8a63a1ce991f11a7b
Author: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
Date:   Thu Nov 29 22:47:15 2007 -0200

[TFRC]: Hide tx history details from the CCIDs

Based on a previous patch by Gerrit Renker.

Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 2668de8..5dea690 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -399,7 +399,6 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct 
sk_buff *skb)
 {
struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
struct ccid3_options_received *opt_recv;
-   struct tfrc_tx_hist_entry *packet;
ktime_t now;
unsigned long t_nfb;
u32 pinv, r_sample;
@@ -414,19 +413,17 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, 
struct sk_buff *skb)
switch (hctx-ccid3hctx_state) {
case TFRC_SSTATE_NO_FBACK:
case TFRC_SSTATE_FBACK:
+   now = ktime_get_real();
+
/* estimate RTT from history if ACK number is valid */
-   packet = tfrc_tx_hist_find_entry(hctx-ccid3hctx_hist,
-
DCCP_SKB_CB(skb)-dccpd_ack_seq);
-   if (packet == NULL) {
+   r_sample = tfrc_tx_hist_rtt(hctx-ccid3hctx_hist,
+   DCCP_SKB_CB(skb)-dccpd_ack_seq, 
now);
+   if (r_sample == 0) {
DCCP_WARN(%s(%p): %s with bogus ACK-%llu\n, 
dccp_role(sk), sk,
  
dccp_packet_name(DCCP_SKB_CB(skb)-dccpd_type),
  (unsigned long 
long)DCCP_SKB_CB(skb)-dccpd_ack_seq);
return;
}
-   /*
-* Garbage-collect older (irrelevant) entries
-*/
-   tfrc_tx_hist_purge(packet-next);
 
/* Update receive rate in units of 64 * bytes/second */
hctx-ccid3hctx_x_recv = opt_recv-ccid3or_receive_rate;
@@ -438,12 +435,10 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, 
struct sk_buff *skb)
hctx-ccid3hctx_p = 0;
else   /* can not exceed 100% */
hctx-ccid3hctx_p = 100 / pinv;
-
-   now = ktime_get_real();
/*
-* Calculate new RTT sample and update moving average
+* Validate new RTT sample and update moving average
 */
-   r_sample = dccp_sample_rtt(sk, ktime_us_delta(now, 
packet-stamp));
+   r_sample = dccp_sample_rtt(sk, r_sample);
hctx-ccid3hctx_rtt = tfrc_ewma(hctx-ccid3hctx_rtt, r_sample, 
9);
 
if (hctx-ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
diff --git a/net/dccp/ccids/lib/packet_history.c 
b/net/dccp/ccids/lib/packet_history.c
index 1397360..4805de9 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -39,12 +39,24 @@
 #include linux/string.h
 #include packet_history.h
 
+/**
+ *  tfrc_tx_hist_entry  -  Simple singly-linked TX history list
+ *  @next:  next oldest entry (LIFO order)
+ *  @seqno: sequence number of this entry
+ *  @stamp: send time of packet with sequence number @seqno
+ */
+struct tfrc_tx_hist_entry {
+   struct tfrc_tx_hist_entry *next;
+   u64   seqno;
+   ktime_t   stamp;
+};
+
 /*
  * Transmitter History Routines
  */
 static struct kmem_cache *tfrc_tx_hist;
 
-struct tfrc_tx_hist_entry *
+static struct tfrc_tx_hist_entry *
tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
 {
while (head != NULL  head-seqno != seqno)
@@ -52,7 +64,6 @@ struct tfrc_tx_hist_entry *
 
return head;
 }
-EXPORT_SYMBOL_GPL(tfrc_tx_hist_find_entry);
 
 int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno)
 {
@@ -83,6 +94,24 @@ void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp)
 }
 EXPORT_SYMBOL_GPL(tfrc_tx_hist_purge);
 
+u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno,
+const ktime_t now)
+{
+   u32 rtt = 0;
+   struct tfrc_tx_hist_entry *packet = tfrc_tx_hist_find_entry(head, 
seqno);
+
+   if (packet != NULL) {
+   rtt = ktime_us_delta(now, packet-stamp);
+   /*
+* Garbage-collect older (irrelevant) entries:
+*/
+   tfrc_tx_hist_purge(packet-next);
+   }
+
+   return rtt;
+}
+EXPORT_SYMBOL_GPL(tfrc_tx_hist_rtt);
+
 /*
  * Receiver History Routines
  */
diff --git a/net/dccp/ccids/lib/packet_history.h 
b/net/dccp/ccids/lib/packet_history.h
index 5c07182..0670f46 100644
--- a/net/dccp/ccids/lib

Re: [PATCH 1/4]: Use AF-independent rebuild_header routine

2007-11-28 Thread Arnaldo Carvalho de Melo
Em Wed, Nov 28, 2007 at 08:35:08AM +, Gerrit Renker escreveu:
 [DCCP]: Use AF-independent rebuild_header routine
 
 This fixes a nasty bug: dccp_send_reset() is called by both DCCPv4 and 
 DCCPv6, but uses
 inet_sk_rebuild_header() in each case. This leads to unpredictable and weird 
 behaviour:
 under some conditions, DCCPv6 Resets were sent, in other not.
 
 The fix is to use the AF-independent rebuild_header routine.
 
 Signed-off-by: Gerrit Renker [EMAIL PROTECTED]

Thanks, applied.

- Arnaldo
-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/4]: Remove duplicate test for CloseReq

2007-11-28 Thread Arnaldo Carvalho de Melo
Em Wed, Nov 28, 2007 at 08:35:11AM +, Gerrit Renker escreveu:
 [DCCP]: Remove duplicate test for CloseReq
 
 This removes a redundant test for unexpected packet types. In 
 dccp_rcv_state_process
 it is tested twice whether a DCCP-server has received a CloseReq (Step 7):
 
  * first in the combined if-statement,
  * then in the call to dccp_rcv_closereq().
 
 The latter is necesssary since dccp_rcv_closereq() is also called from
 __dccp_rcv_established().
 
 This patch removes the duplicate test.
 
 Signed-off-by: Gerrit Renker [EMAIL PROTECTED]

Thanks, applied.

- Arnaldo
-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/4]: Integrate state transitions for passive-close

2007-11-28 Thread Arnaldo Carvalho de Melo
Em Wed, Nov 28, 2007 at 08:35:10AM +, Gerrit Renker escreveu:
 [DCCP]: Integrate state transitions for passive-close
 
 This adds the necessary state transitions for the two forms of passive-close
 
  * PASSIVE_CLOSE- which is entered when a host   receives a Close;
  * PASSIVE_CLOSEREQ - which is entered when a client receives a CloseReq.
 
 Here is a detailed account of what the patch does in each state.
   
 1) Receiving CloseReq
 --
   The pseudo-code in 8.5 says:
 
  Step 13: Process CloseReq
   If P.type == CloseReq and S.state  CLOSEREQ,
   Generate Close
   S.state := CLOSING
   Set CLOSING timer.
 
   This means we need to address what to do in CLOSED, LISTEN, REQUEST, 
 RESPOND, PARTOPEN, and OPEN.
 
* CLOSED: silently ignore - it may be a late or duplicate CloseReq;
* LISTEN/RESPOND: will not appear, since Step 7 is performed first (we 
 know we are the client);
* REQUEST:perform Step 13 directly (no need to enqueue packet);
* OPEN/PARTOPEN:  enter PASSIVE_CLOSEREQ so that the application has a 
 chance to process unread data.
 
   When already in PASSIVE_CLOSEREQ, no second CloseReq is enqueued. In any 
 other state, the CloseReq is ignored.
   I think that this offers some robustness against rare and pathological 
 cases: e.g. a simultaneous close where
   the client sends a Close and the server a CloseReq. The client will then be 
 retransmitting its Close until it
   gets the Reset, so ignoring the CloseReq while in state CLOSING is sane.
   
 2) Receiving Close
 ---
   The code below from 8.5 is unconditional.
 
  Step 14: Process Close
   If P.type == Close,
   Generate Reset(Closed)
   Tear down connection
   Drop packet and return
 
   Thus we need to consider all states:
* CLOSED:   silently ignore, since this can happen when a 
 retransmitted or late Close arrives;
* LISTEN:   dccp_rcv_state_process() will generate a Reset (No 
 Connection);
* REQUEST:  perform Step 14 directly (no need to enqueue packet);
* RESPOND:  dccp_check_req() will generate a Reset (Packet 
 Error) -- left it at that;
* OPEN/PARTOPEN:enter PASSIVE_CLOSE so that application has a chance 
 to process unread data;
* CLOSEREQ: server performed active-close -- perform Step 14;
* CLOSING:  simultaneous-close: use a tie-breaker to avoid message 
 ping-pong (see comment);
* PASSIVE_CLOSEREQ: ignore - the peer has a bug (sending first a CloseReq 
 and now a Close);
* TIMEWAIT: packet is ignored.
 
Note that the condition of receiving a packet in state CLOSED here is 
 different from the condition there
is no socket for such a connection: the socket still exists, but its 
 state indicates it is unusable.
 
Last, dccp_finish_passive_close sets either DCCP_CLOSED or DCCP_CLOSING = 
 TCP_CLOSING, so that 
sk_stream_wait_close() will wait for the final Reset (which will trigger 
 CLOSING = CLOSED).
 
 Signed-off-by: Gerrit Renker [EMAIL PROTECTED]

Applied
-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/4]: Dedicated auxiliary states to support passive-close

2007-11-28 Thread Arnaldo Carvalho de Melo
Em Wed, Nov 28, 2007 at 08:35:09AM +, Gerrit Renker escreveu:
 [DCCP]: Dedicated auxiliary states to support passive-close
 
 This adds two auxiliary states to deal with passive closes:
   * PASSIVE_CLOSE(reached from OPEN via reception of Close)and
   * PASSIVE_CLOSEREQ (reached from OPEN via reception of CloseReq)
 as internal intermediate states.  
 
 These states are used to allow a receiver to process unread data before 
 acknowledging the received connection-termination-request (the 
 Close/CloseReq).
 
 Without such support, it will happen that passively-closed sockets enter 
 CLOSED
 state while there is still unprocessed data in the queue; leading to 
 unexpected
 and erratic API behaviour.
 
 PASSIVE_CLOSE has been mapped into TCPF_CLOSE_WAIT, so that the code will 
 seamlessly
 work with inet_accept() (which tests for this state).
 
 The state names are thanks to Arnaldo, who suggested this naming scheme 
 following
 an earlier revision of this patch.
 
 Signed-off-by: Gerrit Renker [EMAIL PROTECTED]

Thanks, applied.

- Arnaldo
-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/5] [TFRC]: Migrate TX history to singly-linked lis

2007-11-28 Thread Arnaldo Carvalho de Melo
This patch was based on another made by Gerrit Renker, his changelog was:

--
The patch set migrates TFRC TX history to a singly-linked list.

The details are:
 * use of a consistent naming scheme (all TFRC functions now begin with 
`tfrc_');
 * allocation and cleanup are taken care of internally;
 * provision of a lookup function, which is used by the CCID TX infrastructure
   to determine the time a packet was sent (in turn used for RTT sampling);
 * integration of the new interface with the present use in CCID3.
--

Simplifications I did:

. removing the tfrc_tx_hist_head that had a pointer to the list head and
  another for the slabcache.
. No need for creating a slabcache for each CCID that wants to use the TFRC
  tx history routines, create a single slabcache when the dccp_tfrc_lib module
  init routine is called.

Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/ccids/ccid3.c  |   57 --
 net/dccp/ccids/ccid3.h  |3 +-
 net/dccp/ccids/lib/loss_interval.c  |   12 ++--
 net/dccp/ccids/lib/packet_history.c |  138 ---
 net/dccp/ccids/lib/packet_history.h |   79 
 5 files changed, 102 insertions(+), 187 deletions(-)

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 964ec91..2668de8 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -49,7 +49,6 @@ static int ccid3_debug;
 #define ccid3_pr_debug(format, a...)
 #endif
 
-static struct dccp_tx_hist *ccid3_tx_hist;
 static struct dccp_rx_hist *ccid3_rx_hist;
 
 /*
@@ -389,28 +388,18 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int 
more,
unsigned int len)
 {
struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
-   struct dccp_tx_hist_entry *packet;
 
ccid3_hc_tx_update_s(hctx, len);
 
-   packet = dccp_tx_hist_entry_new(ccid3_tx_hist, GFP_ATOMIC);
-   if (unlikely(packet == NULL)) {
+   if (tfrc_tx_hist_add(hctx-ccid3hctx_hist, dccp_sk(sk)-dccps_gss))
DCCP_CRIT(packet history - out of memory!);
-   return;
-   }
-   dccp_tx_hist_add_entry(hctx-ccid3hctx_hist, packet);
-
-   packet-dccphtx_tstamp = ktime_get_real();
-   packet-dccphtx_seqno  = dccp_sk(sk)-dccps_gss;
-   packet-dccphtx_rtt= hctx-ccid3hctx_rtt;
-   packet-dccphtx_sent   = 1;
 }
 
 static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 {
struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
struct ccid3_options_received *opt_recv;
-   struct dccp_tx_hist_entry *packet;
+   struct tfrc_tx_hist_entry *packet;
ktime_t now;
unsigned long t_nfb;
u32 pinv, r_sample;
@@ -425,16 +414,19 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, 
struct sk_buff *skb)
switch (hctx-ccid3hctx_state) {
case TFRC_SSTATE_NO_FBACK:
case TFRC_SSTATE_FBACK:
-   /* get packet from history to look up t_recvdata */
-   packet = dccp_tx_hist_find_entry(hctx-ccid3hctx_hist,
- DCCP_SKB_CB(skb)-dccpd_ack_seq);
-   if (unlikely(packet == NULL)) {
-   DCCP_WARN(%s(%p), seqno %llu(%s) doesn't exist 
- in history!\n,  dccp_role(sk), sk,
-   (unsigned long long)DCCP_SKB_CB(skb)-dccpd_ack_seq,
-   dccp_packet_name(DCCP_SKB_CB(skb)-dccpd_type));
+   /* estimate RTT from history if ACK number is valid */
+   packet = tfrc_tx_hist_find_entry(hctx-ccid3hctx_hist,
+
DCCP_SKB_CB(skb)-dccpd_ack_seq);
+   if (packet == NULL) {
+   DCCP_WARN(%s(%p): %s with bogus ACK-%llu\n, 
dccp_role(sk), sk,
+ 
dccp_packet_name(DCCP_SKB_CB(skb)-dccpd_type),
+ (unsigned long 
long)DCCP_SKB_CB(skb)-dccpd_ack_seq);
return;
}
+   /*
+* Garbage-collect older (irrelevant) entries
+*/
+   tfrc_tx_hist_purge(packet-next);
 
/* Update receive rate in units of 64 * bytes/second */
hctx-ccid3hctx_x_recv = opt_recv-ccid3or_receive_rate;
@@ -451,7 +443,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct 
sk_buff *skb)
/*
 * Calculate new RTT sample and update moving average
 */
-   r_sample = dccp_sample_rtt(sk, ktime_us_delta(now, 
packet-dccphtx_tstamp));
+   r_sample = dccp_sample_rtt(sk, ktime_us_delta(now, 
packet-stamp));
hctx-ccid3hctx_rtt = tfrc_ewma(hctx-ccid3hctx_rtt, r_sample, 
9);
 
if (hctx

[PATCH 4/5] [DCCP]: Integrate state transitions for passive-close

2007-11-28 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

This adds the necessary state transitions for the two forms of passive-close

 * PASSIVE_CLOSE- which is entered when a host   receives a Close;
 * PASSIVE_CLOSEREQ - which is entered when a client receives a CloseReq.

Here is a detailed account of what the patch does in each state.

1) Receiving CloseReq

  The pseudo-code in 8.5 says:

 Step 13: Process CloseReq
  If P.type == CloseReq and S.state  CLOSEREQ,
  Generate Close
  S.state := CLOSING
  Set CLOSING timer.

  This means we need to address what to do in CLOSED, LISTEN, REQUEST, RESPOND, 
PARTOPEN, and OPEN.

   * CLOSED: silently ignore - it may be a late or duplicate CloseReq;
   * LISTEN/RESPOND: will not appear, since Step 7 is performed first (we know 
we are the client);
   * REQUEST:perform Step 13 directly (no need to enqueue packet);
   * OPEN/PARTOPEN:  enter PASSIVE_CLOSEREQ so that the application has a 
chance to process unread data.

  When already in PASSIVE_CLOSEREQ, no second CloseReq is enqueued. In any 
other state, the CloseReq is ignored.
  I think that this offers some robustness against rare and pathological cases: 
e.g. a simultaneous close where
  the client sends a Close and the server a CloseReq. The client will then be 
retransmitting its Close until it
  gets the Reset, so ignoring the CloseReq while in state CLOSING is sane.

2) Receiving Close

  The code below from 8.5 is unconditional.

 Step 14: Process Close
  If P.type == Close,
  Generate Reset(Closed)
  Tear down connection
  Drop packet and return

  Thus we need to consider all states:
   * CLOSED:   silently ignore, since this can happen when a 
retransmitted or late Close arrives;
   * LISTEN:   dccp_rcv_state_process() will generate a Reset (No 
Connection);
   * REQUEST:  perform Step 14 directly (no need to enqueue packet);
   * RESPOND:  dccp_check_req() will generate a Reset (Packet Error) 
-- left it at that;
   * OPEN/PARTOPEN:enter PASSIVE_CLOSE so that application has a chance to 
process unread data;
   * CLOSEREQ: server performed active-close -- perform Step 14;
   * CLOSING:  simultaneous-close: use a tie-breaker to avoid message 
ping-pong (see comment);
   * PASSIVE_CLOSEREQ: ignore - the peer has a bug (sending first a CloseReq 
and now a Close);
   * TIMEWAIT: packet is ignored.

   Note that the condition of receiving a packet in state CLOSED here is 
different from the condition there
   is no socket for such a connection: the socket still exists, but its state 
indicates it is unusable.

   Last, dccp_finish_passive_close sets either DCCP_CLOSED or DCCP_CLOSING = 
TCP_CLOSING, so that
   sk_stream_wait_close() will wait for the final Reset (which will trigger 
CLOSING = CLOSED).

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 include/linux/dccp.h |1 -
 net/dccp/input.c |   88 +
 net/dccp/proto.c |   88 +-
 3 files changed, 131 insertions(+), 46 deletions(-)

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 8b3f9ad..312b989 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -260,7 +260,6 @@ enum dccp_state {
 };
 
 #define DCCP_STATE_MASK 0x1f
-#define DCCP_ACTION_FIN (17)
 
 enum {
DCCPF_OPEN= TCPF_ESTABLISHED,
diff --git a/net/dccp/input.c b/net/dccp/input.c
index ef299fb..fe4b0fb 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -32,16 +32,56 @@ static void dccp_fin(struct sock *sk, struct sk_buff *skb)
sk-sk_data_ready(sk, 0);
 }
 
-static void dccp_rcv_close(struct sock *sk, struct sk_buff *skb)
+static int dccp_rcv_close(struct sock *sk, struct sk_buff *skb)
 {
-   dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
-   dccp_fin(sk, skb);
-   dccp_set_state(sk, DCCP_CLOSED);
-   sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
+   int queued = 0;
+
+   switch (sk-sk_state) {
+   /*
+* We ignore Close when received in one of the following states:
+*  - CLOSED(may be a late or duplicate packet)
+*  - PASSIVE_CLOSEREQ  (the peer has sent a CloseReq earlier)
+*  - RESPOND   (already handled by dccp_check_req)
+*/
+   case DCCP_CLOSING:
+   /*
+* Simultaneous-close: receiving a Close after sending one. This
+* can happen if both client and server perform active-close and
+* will result in an endless ping-pong of crossing and retrans-
+* mitted Close packets, which only terminates when one of the
+* nodes times out (min. 64 seconds). Quicker convergence can be
+* achieved when one of the nodes acts as tie

[PATCH 3/5] [DCCP]: Dedicated auxiliary states to support passive-close

2007-11-28 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

This adds two auxiliary states to deal with passive closes:
  * PASSIVE_CLOSE(reached from OPEN via reception of Close)and
  * PASSIVE_CLOSEREQ (reached from OPEN via reception of CloseReq)
as internal intermediate states.

These states are used to allow a receiver to process unread data before
acknowledging the received connection-termination-request (the Close/CloseReq).

Without such support, it will happen that passively-closed sockets enter CLOSED
state while there is still unprocessed data in the queue; leading to unexpected
and erratic API behaviour.

PASSIVE_CLOSE has been mapped into TCPF_CLOSE_WAIT, so that the code will
seamlessly work with inet_accept() (which tests for this state).

The state names are thanks to Arnaldo, who suggested this naming scheme
following an earlier revision of this patch.

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 include/linux/dccp.h |   56 ++---
 net/dccp/proto.c |   22 ++-
 2 files changed, 51 insertions(+), 27 deletions(-)

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index a007326..8b3f9ad 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -227,29 +227,51 @@ struct dccp_so_feat {
 #include net/tcp_states.h
 
 enum dccp_state {
-   DCCP_OPEN   = TCP_ESTABLISHED,
-   DCCP_REQUESTING = TCP_SYN_SENT,
-   DCCP_LISTEN = TCP_LISTEN,
-   DCCP_RESPOND= TCP_SYN_RECV,
-   DCCP_CLOSING= TCP_CLOSING,
-   DCCP_TIME_WAIT  = TCP_TIME_WAIT,
-   DCCP_CLOSED = TCP_CLOSE,
-   DCCP_PARTOPEN   = TCP_MAX_STATES,
+   DCCP_OPEN= TCP_ESTABLISHED,
+   DCCP_REQUESTING  = TCP_SYN_SENT,
+   DCCP_LISTEN  = TCP_LISTEN,
+   DCCP_RESPOND = TCP_SYN_RECV,
+   /*
+* States involved in closing a DCCP connection:
+* 1) ACTIVE_CLOSEREQ is entered by a server sending a CloseReq.
+*
+* 2) CLOSING can have three different meanings (RFC 4340, 8.3):
+*  a. Client has performed active-close, has sent a Close to the server
+* from state OPEN or PARTOPEN, and is waiting for the final Reset
+* (in this case, SOCK_DONE == 1).
+*  b. Client is asked to perform passive-close, by receiving a CloseReq
+* in (PART)OPEN state. It sends a Close and waits for final Reset
+* (in this case, SOCK_DONE == 0).
+*  c. Server performs an active-close as in (a), keeps TIMEWAIT state.
+*
+* 3) The following intermediate states are employed to give passively
+*closing nodes a chance to process their unread data:
+*- PASSIVE_CLOSE(from OPEN = CLOSED) and
+*- PASSIVE_CLOSEREQ (from (PART)OPEN to CLOSING; case (b) above).
+*/
+   DCCP_ACTIVE_CLOSEREQ = TCP_FIN_WAIT1,
+   DCCP_PASSIVE_CLOSE   = TCP_CLOSE_WAIT,  /* any node receiving a Close */
+   DCCP_CLOSING = TCP_CLOSING,
+   DCCP_TIME_WAIT   = TCP_TIME_WAIT,
+   DCCP_CLOSED  = TCP_CLOSE,
+   DCCP_PARTOPEN= TCP_MAX_STATES,
+   DCCP_PASSIVE_CLOSEREQ,  /* clients receiving CloseReq */
DCCP_MAX_STATES
 };
 
-#define DCCP_STATE_MASK 0xf
+#define DCCP_STATE_MASK 0x1f
 #define DCCP_ACTION_FIN (17)
 
 enum {
-   DCCPF_OPEN   = TCPF_ESTABLISHED,
-   DCCPF_REQUESTING = TCPF_SYN_SENT,
-   DCCPF_LISTEN = TCPF_LISTEN,
-   DCCPF_RESPOND= TCPF_SYN_RECV,
-   DCCPF_CLOSING= TCPF_CLOSING,
-   DCCPF_TIME_WAIT  = TCPF_TIME_WAIT,
-   DCCPF_CLOSED = TCPF_CLOSE,
-   DCCPF_PARTOPEN   = 1  DCCP_PARTOPEN,
+   DCCPF_OPEN= TCPF_ESTABLISHED,
+   DCCPF_REQUESTING  = TCPF_SYN_SENT,
+   DCCPF_LISTEN  = TCPF_LISTEN,
+   DCCPF_RESPOND = TCPF_SYN_RECV,
+   DCCPF_ACTIVE_CLOSEREQ = TCPF_FIN_WAIT1,
+   DCCPF_CLOSING = TCPF_CLOSING,
+   DCCPF_TIME_WAIT   = TCPF_TIME_WAIT,
+   DCCPF_CLOSED  = TCPF_CLOSE,
+   DCCPF_PARTOPEN= (1  DCCP_PARTOPEN),
 };
 
 static inline struct dccp_hdr *dccp_hdr(const struct sk_buff *skb)
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 73006b7..3489d3f 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -60,8 +60,7 @@ void dccp_set_state(struct sock *sk, const int state)
 {
const int oldstate = sk-sk_state;
 
-   dccp_pr_debug(%s(%p) %-10.10s - %s\n,
- dccp_role(sk), sk,
+   dccp_pr_debug(%s(%p)  %s  --  %s\n, dccp_role(sk), sk,
  dccp_state_name(oldstate), dccp_state_name(state));
WARN_ON(state == oldstate);
 
@@ -134,14 +133,17 @@ EXPORT_SYMBOL_GPL(dccp_packet_name);
 const char *dccp_state_name(const int state)
 {
static char *dccp_state_names[] = {
-   [DCCP_OPEN]   = OPEN,
-   [DCCP_REQUESTING

[PATCH 2/5] Use AF-independent rebuild_header routine

2007-11-28 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

[DCCP]: Use AF-independent rebuild_header routine

This fixes a nasty bug: dccp_send_reset() is called by both DCCPv4 and DCCPv6, 
but uses
inet_sk_rebuild_header() in each case. This leads to unpredictable and weird 
behaviour:
under some conditions, DCCPv6 Resets were sent, in other not.

The fix is to use the AF-independent rebuild_header routine.

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/output.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/net/dccp/output.c b/net/dccp/output.c
index 33ce737..7caa7f5 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -391,7 +391,7 @@ int dccp_send_reset(struct sock *sk, enum dccp_reset_codes 
code)
 * FIXME: what if rebuild_header fails?
 * Should we be doing a rebuild_header here?
 */
-   int err = inet_sk_rebuild_header(sk);
+   int err = inet_csk(sk)-icsk_af_ops-rebuild_header(sk);
 
if (err != 0)
return err;
-- 
1.5.3.4

-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHES 0/5]: DCCP patches for 2.6.25

2007-11-28 Thread Arnaldo Carvalho de Melo
Hi Herbert,

Please consider pulling from:

master.kernel.org:/pub/scm/linux/kernel/git/acme/net-2.6.25

Best Regards,

- Arnaldo

 b/include/linux/dccp.h|   56 +
 b/net/dccp/ccids/ccid3.c  |   57 --
 b/net/dccp/ccids/ccid3.h  |3 
 b/net/dccp/ccids/lib/loss_interval.c  |   12 +-
 b/net/dccp/ccids/lib/packet_history.c |  138 +++---
 b/net/dccp/ccids/lib/packet_history.h |   80 +++
 b/net/dccp/input.c|   88 +
 b/net/dccp/output.c   |3 
 b/net/dccp/proto.c|   23 ++---
 include/linux/dccp.h  |1 
 net/dccp/input.c  |7 -
 net/dccp/proto.c  |   89 ++---
 12 files changed, 287 insertions(+), 270 deletions(-)
-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 5/5] [DCCP]: Remove duplicate test for CloseReq

2007-11-28 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

This removes a redundant test for unexpected packet types. In 
dccp_rcv_state_process
it is tested twice whether a DCCP-server has received a CloseReq (Step 7):

 * first in the combined if-statement,
 * then in the call to dccp_rcv_closereq().

The latter is necesssary since dccp_rcv_closereq() is also called from
__dccp_rcv_established().

This patch removes the duplicate test.

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/input.c |6 ++
 1 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/dccp/input.c b/net/dccp/input.c
index fe4b0fb..decf2f2 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -629,16 +629,14 @@ int dccp_rcv_state_process(struct sock *sk, struct 
sk_buff *skb,
return 0;
/*
 *   Step 7: Check for unexpected packet types
-*  If (S.is_server and P.type == CloseReq)
-*  or (S.is_server and P.type == Response)
+*  If (S.is_server and P.type == Response)
 *  or (S.is_client and P.type == Request)
 *  or (S.state == RESPOND and P.type == Data),
 *Send Sync packet acknowledging P.seqno
 *Drop packet and return
 */
} else if ((dp-dccps_role != DCCP_ROLE_CLIENT 
-   (dh-dccph_type == DCCP_PKT_RESPONSE ||
-dh-dccph_type == DCCP_PKT_CLOSEREQ)) ||
+   dh-dccph_type == DCCP_PKT_RESPONSE) ||
(dp-dccps_role == DCCP_ROLE_CLIENT 
 dh-dccph_type == DCCP_PKT_REQUEST) ||
(sk-sk_state == DCCP_RESPOND 
-- 
1.5.3.4

-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 04/18] [CCID2]: Don't assign negative values to Ack Ratio

2007-11-24 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

Since it makes not sense to assign negative values to Ack Ratio, this
patch disallows this possibility.

As a consequence, a Bug test for negative Ack Ratio values becomes obsolete.

Furthermore, a check against overflow (as Ack Ratio may not exceed 2 bytes,
due to RFC 4340, 11.3) has been added.

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Acked-by: Ian McDonald [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/ccids/ccid2.c |9 +
 1 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 5552218..f18235e 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -140,7 +140,7 @@ static int ccid2_hc_tx_send_packet(struct sock *sk, struct 
sk_buff *skb)
return 1; /* XXX CCID should dequeue when ready instead of polling */
 }
 
-static void ccid2_change_l_ack_ratio(struct sock *sk, int val)
+static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
 {
struct dccp_sock *dp = dccp_sk(sk);
/*
@@ -159,9 +159,10 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, int 
val)
if (val  max)
val = max;
}
+   if (val  0x)   /* RFC 4340, 11.3 */
+   val = 0x;
 
-   ccid2_pr_debug(changing local ack ratio to %d\n, val);
-   WARN_ON(val = 0);
+   ccid2_pr_debug(changing local ack ratio to %u\n, val);
dp-dccps_l_ack_ratio = val;
 }
 
@@ -572,7 +573,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct 
sk_buff *skb)
hctx-ccid2hctx_rpdupack = -1; /* XXX lame */
hctx-ccid2hctx_rpseq = 0;
 
-   ccid2_change_l_ack_ratio(sk, 
dp-dccps_l_ack_ratio  1);
+   ccid2_change_l_ack_ratio(sk, 2 * 
dp-dccps_l_ack_ratio);
}
}
}
-- 
1.5.3.4

-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 03/18] [CCID2]: Fix sequence number arithmetic/comparisons

2007-11-24 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

This replaces use of normal subtraction with modulo-48 subtraction.

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Acked-by: Ian McDonald [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/ccids/ccid2.c |9 ++---
 1 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 7873dc7..5552218 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -24,9 +24,6 @@
 
 /*
  * This implementation should follow RFC 4341
- *
- * BUGS:
- * - sequence number wrapping
  */
 
 #include ../ccid.h
@@ -619,9 +616,8 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct 
sk_buff *skb)
/* go through this ack vector */
while (veclen--) {
const u8 rl = *vector  DCCP_ACKVEC_LEN_MASK;
-   u64 ackno_end_rl;
+   u64 ackno_end_rl = SUB48(ackno, rl);
 
-   dccp_set_seqno(ackno_end_rl, ackno - rl);
ccid2_pr_debug(ackvec start:%llu end:%llu\n,
   (unsigned long long)ackno,
   (unsigned long long)ackno_end_rl);
@@ -671,8 +667,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct 
sk_buff *skb)
if (done)
break;
 
-
-   dccp_set_seqno(ackno, ackno_end_rl - 1);
+   ackno = SUB48(ackno_end_rl, 1);
vector++;
}
if (done)
-- 
1.5.3.4

-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 02/18] [CCID2]: Bug in reading Ack Vectors

2007-11-24 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

In CCID2 the receiver-history is sorted in ascending order of sequence number,
but the processing of received Ack Vectors requires the list traversal in the
opposite direction.

The current code has a bug in this regard: the list traversal is upwards. As a
consequence, only Ack Vectors with a run length of 1 will pass, in all other
Ack Vectors the remaining (acked) sequence numbers are missed, and may later
falsely be identified as lost.

Note: This bug is only visible when Ack Ratio  1, since otherwise the run
  lengths of Ack Vectors are 0.

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Ian McDonald [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/ccids/ccid2.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index c9c465e..7873dc7 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -666,7 +666,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct 
sk_buff *skb)
done = 1;
break;
}
-   seqp = seqp-ccid2s_next;
+   seqp = seqp-ccid2s_prev;
}
if (done)
break;
-- 
1.5.3.4

-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 06/18] [DCCP]: Initialize dccp_sock before calling the ccid constructors

2007-11-24 Thread Arnaldo Carvalho de Melo
This is because in the next patch CCID2 will assume that dccps_mss_cache is
non-zero.

Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/proto.c |   25 +
 1 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 0aec735..d48005f 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -174,6 +174,19 @@ int dccp_init_sock(struct sock *sk, const __u8 
ctl_sock_initialized)
 
dccp_minisock_init(dp-dccps_minisock);
 
+   icsk-icsk_rto  = DCCP_TIMEOUT_INIT;
+   icsk-icsk_syn_retries  = sysctl_dccp_request_retries;
+   sk-sk_state= DCCP_CLOSED;
+   sk-sk_write_space  = dccp_write_space;
+   icsk-icsk_sync_mss = dccp_sync_mss;
+   dp-dccps_mss_cache = 536;
+   dp-dccps_rate_last = jiffies;
+   dp-dccps_role  = DCCP_ROLE_UNDEFINED;
+   dp-dccps_service   = DCCP_SERVICE_CODE_IS_ABSENT;
+   dp-dccps_l_ack_ratio   = dp-dccps_r_ack_ratio = 1;
+
+   dccp_init_xmit_timers(sk);
+
/*
 * FIXME: We're hardcoding the CCID, and doing this at this point makes
 * the listening (master) sock get CCID control blocks, which is not
@@ -213,18 +226,6 @@ int dccp_init_sock(struct sock *sk, const __u8 
ctl_sock_initialized)
INIT_LIST_HEAD(dmsk-dccpms_conf);
}
 
-   dccp_init_xmit_timers(sk);
-   icsk-icsk_rto  = DCCP_TIMEOUT_INIT;
-   icsk-icsk_syn_retries  = sysctl_dccp_request_retries;
-   sk-sk_state= DCCP_CLOSED;
-   sk-sk_write_space  = dccp_write_space;
-   icsk-icsk_sync_mss = dccp_sync_mss;
-   dp-dccps_mss_cache = 536;
-   dp-dccps_rate_last = jiffies;
-   dp-dccps_role  = DCCP_ROLE_UNDEFINED;
-   dp-dccps_service   = DCCP_SERVICE_CODE_IS_ABSENT;
-   dp-dccps_l_ack_ratio   = dp-dccps_r_ack_ratio = 1;
-
return 0;
 }
 
-- 
1.5.3.4

-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 07/18] [CCID2]: Larger initial windows also for CCID2

2007-11-24 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

RFC 4341, sec. 5 states that The cwnd parameter is initialized to at most
four packets for new connections, following the rules from [RFC3390], which
is implemented by this patch.

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Acked-by: Ian McDonald [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/ccids/ccid2.c |   20 +++-
 1 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index ef19fb8..9c5b6c7 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -741,15 +741,25 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, 
struct sk_buff *skb)
 static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
 {
struct ccid2_hc_tx_sock *hctx = ccid_priv(ccid);
+   struct dccp_sock *dp = dccp_sk(sk);
+   u32 max_ratio;
 
-   ccid2_change_cwnd(hctx, 1);
-   /* Initialize ssthresh to infinity.  This means that we will exit the
-* initial slow-start after the first packet loss.  This is what we
-* want.
-*/
+   /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */
hctx-ccid2hctx_ssthresh  = ~0;
hctx-ccid2hctx_numdupack = 3;
 
+   /*
+* RFC 4341, 5: The cwnd parameter is initialized to at most four
+* packets for new connections, following the rules from [RFC3390].
+* We need to convert the bytes of RFC3390 into the packets of RFC 4341.
+*/
+   hctx-ccid2hctx_cwnd = min(4U, max(2U, 4380U / dp-dccps_mss_cache));
+
+   /* Make sure that Ack Ratio is enabled and within bounds. */
+   max_ratio = DIV_ROUND_UP(hctx-ccid2hctx_cwnd, 2);
+   if (dp-dccps_l_ack_ratio == 0 || dp-dccps_l_ack_ratio  max_ratio)
+   dp-dccps_l_ack_ratio = max_ratio;
+
/* XXX init ~ to window size... */
if (ccid2_hc_tx_alloc_seq(hctx))
return -ENOMEM;
-- 
1.5.3.4

-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 08/18] [CCID2]: Disable broken Ack Ratio adaptation algorithm

2007-11-24 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

This comments out a problematic section comprising a half-finished algorithm:

 - The variable `ccid2hctx_ackloss' is never initialised to a value different 
from 0 and
   hence in fact is a read-only constant.
 - The `arsent' variable counts packets other than Acks (it is incremented for 
every packet),
   and there is no test for Ack Loss.
 - The concept of counting Acks as such leads to a complex calculation, and the 
calculation
   at the moment is inconsistent with this concept.
   The problem is that the number of Acks - rather than the number of windows - 
is counted,
   which leads to a complex (cubic/quadratic) expression - this is not even 
implemented.

In its current state, the commented-out algorithm interfers with normal 
processing by
changing Ack Ratio incorrectly, and at the wrong times.

A new algorithm is necessary, which will not necessarily use the same variables 
as used by
the unfinished one; hence the old variables have been removed.

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Acked-by: Ian McDonald [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/ccids/ccid2.c |   23 +--
 net/dccp/ccids/ccid2.h |4 
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 9c5b6c7..a901202 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -224,8 +224,6 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
hctx-ccid2hctx_sent= 0;
 
/* clear ack ratio state. */
-   hctx-ccid2hctx_arsent   = 0;
-   hctx-ccid2hctx_ackloss  = 0;
hctx-ccid2hctx_rpseq= 0;
hctx-ccid2hctx_rpdupack = -1;
ccid2_change_l_ack_ratio(sk, 1);
@@ -289,6 +287,26 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int 
more, unsigned int len)
 
hctx-ccid2hctx_sent++;
 
+   /*
+* FIXME: The code below is broken and the variables have been removed
+* from the socket struct. The `ackloss' variable was always set to 0,
+* and with arsent there are several problems:
+*  (i) it doesn't just count the number of Acks, but all sent packets;
+*  (ii) it is expressed in # of packets, not # of windows, so the
+*  comparison below uses the wrong formula: Appendix A of RFC 4341
+*  comes up with the number K = cwnd / (R^2 - R) of consecutive windows
+*  of data with no lost or marked Ack packets. If arsent were the # of
+*  consecutive Acks received without loss, then Ack Ratio needs to be
+*  decreased by 1 when
+*arsent =  K * cwnd / R  =  cwnd^2 / (R^3 - R^2)
+*  where cwnd / R is the number of Acks received per window of data
+*  (cf. RFC 4341, App. A). The problems are that
+*  - arsent counts other packets as well;
+*  - the comparison uses a formula different from RFC 4341;
+*  - computing a cubic/quadratic equation each time is too complicated.
+*  Hence a different algorithm is needed.
+*/
+#if 0
/* Ack Ratio.  Need to maintain a concept of how many windows we sent */
hctx-ccid2hctx_arsent++;
/* We had an ack loss in this window... */
@@ -316,6 +334,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int 
more, unsigned int len)
hctx-ccid2hctx_arsent = 0; /* or maybe set it to cwnd*/
}
}
+#endif
 
/* setup RTO timer */
if (!timer_pending(hctx-ccid2hctx_rtotimer))
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index d9daa53..ed95f8f 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -44,8 +44,6 @@ struct ccid2_seq {
  * @ccid2hctx_acks - ACKS recv in AI phase
  * @ccid2hctx_sent - packets sent in this window
  * @ccid2hctx_lastrtt -time RTT was last measured
- * @ccid2hctx_arsent - packets sent [ack ratio]
- * @ccid2hctx_ackloss - ack was lost in this win
  * @ccid2hctx_rpseq - last consecutive seqno
  * @ccid2hctx_rpdupack - dupacks since rpseq
 */
@@ -66,8 +64,6 @@ struct ccid2_hc_tx_sock {
int ccid2hctx_sent;
unsigned long   ccid2hctx_lastrtt;
struct timer_list   ccid2hctx_rtotimer;
-   unsigned long   ccid2hctx_arsent;
-   int ccid2hctx_ackloss;
u64 ccid2hctx_rpseq;
int ccid2hctx_rpdupack;
int ccid2hctx_sendwait;
-- 
1.5.3.4

-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 10/18] [CCID2]: Replace read-only variable with constant

2007-11-24 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

This replaces the field member `numdupack', which was used as a read-only
constant in the code, with a #define.

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Ian McDonald [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/ccids/ccid2.c |8 +++-
 net/dccp/ccids/ccid2.h |3 ++-
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 9fa0eb5..a3c42cd 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -586,8 +586,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct 
sk_buff *skb)
hctx-ccid2hctx_rpdupack++;
 
/* check if we got enough dupacks */
-   if (hctx-ccid2hctx_rpdupack =
-   hctx-ccid2hctx_numdupack) {
+   if (hctx-ccid2hctx_rpdupack = NUMDUPACK) {
hctx-ccid2hctx_rpdupack = -1; /* XXX lame */
hctx-ccid2hctx_rpseq = 0;
 
@@ -708,7 +707,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct 
sk_buff *skb)
while (1) {
if (seqp-ccid2s_acked) {
done++;
-   if (done == hctx-ccid2hctx_numdupack)
+   if (done == NUMDUPACK)
break;
}
if (seqp == hctx-ccid2hctx_seqt)
@@ -719,7 +718,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct 
sk_buff *skb)
/* If there are at least 3 acknowledgements, anything unacknowledged
 * below the last sequence number is considered lost
 */
-   if (done == hctx-ccid2hctx_numdupack) {
+   if (done == NUMDUPACK) {
struct ccid2_seq *last_acked = seqp;
 
/* check for lost packets */
@@ -761,7 +760,6 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock 
*sk)
 
/* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */
hctx-ccid2hctx_ssthresh  = ~0;
-   hctx-ccid2hctx_numdupack = 3;
 
/*
 * RFC 4341, 5: The cwnd parameter is initialized to at most four
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 6d0b4e3..443f08a 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -24,6 +24,8 @@
 #include linux/timer.h
 #include linux/types.h
 #include ../ccid.h
+/* NUMDUPACK parameter from RFC 4341, p. 6 */
+#define NUMDUPACK  3
 
 struct sock;
 
@@ -52,7 +54,6 @@ struct ccid2_hc_tx_sock {
int ccid2hctx_acks;
unsigned intccid2hctx_ssthresh;
int ccid2hctx_pipe;
-   int ccid2hctx_numdupack;
struct ccid2_seq*ccid2hctx_seqbuf[CCID2_SEQBUF_MAX];
int ccid2hctx_seqbufc;
struct ccid2_seq*ccid2hctx_seqh;
-- 
1.5.3.4

-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 17/18] [DCCP]: Check for unread data on close

2007-11-24 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

This removes one FIXME with regard to close when there is still unread data.
The mechanism is implemented similar to TCP: with regard to DCCP-specifics,
a Reset with Code 2, Aborted is sent to the peer.

This corresponds in part to RFC 4340, 8.1.1 and 8.1.5.

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Ian McDonald [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/proto.c |   10 --
 1 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index d48005f..5f47b45 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -891,6 +891,7 @@ void dccp_close(struct sock *sk, long timeout)
 {
struct dccp_sock *dp = dccp_sk(sk);
struct sk_buff *skb;
+   u32 data_was_unread = 0;
int state;
 
lock_sock(sk);
@@ -913,12 +914,17 @@ void dccp_close(struct sock *sk, long timeout)
 * descriptor close, not protocol-sourced closes, because the
  *reader process may not have drained the data yet!
 */
-   /* FIXME: check for unread data */
while ((skb = __skb_dequeue(sk-sk_receive_queue)) != NULL) {
+   data_was_unread += skb-len;
__kfree_skb(skb);
}
 
-   if (sock_flag(sk, SOCK_LINGER)  !sk-sk_lingertime) {
+   if (data_was_unread) {
+   /* Unread data was tossed, send an appropriate Reset Code */
+   DCCP_WARN(DCCP: ABORT -- %u bytes unread\n, data_was_unread);
+   dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
+   dccp_set_state(sk, DCCP_CLOSED);
+   } else if (sock_flag(sk, SOCK_LINGER)  !sk-sk_lingertime) {
/* Check zero linger _after_ checking for unread data. */
sk-sk_prot-disconnect(sk, 0);
} else if (dccp_close_state(sk)) {
-- 
1.5.3.4

-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 12/18] [CCID2]: Replace pipe assignment-function with assignment

2007-11-24 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

The function ccid2_change_pipe only does an assignment. This patch simplifies 
the code by
replacing the function with the assignment it performs.

Furthermore, the type of pipe is promoted from `signed' to unsigned (increasing 
the range).
As a result, a BUG_ON test for negative values now becomes obsolete (for safety 
not removed,
but replaced with a less annoying `DCCP_BUG').

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Ian McDonald [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/ccids/ccid2.c |   16 ++--
 net/dccp/ccids/ccid2.h |3 ++-
 2 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 747fa1c..237007b 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -171,11 +171,6 @@ static void ccid2_change_srtt(struct ccid2_hc_tx_sock 
*hctx, long val)
hctx-ccid2hctx_srtt = val;
 }
 
-static void ccid2_change_pipe(struct ccid2_hc_tx_sock *hctx, long val)
-{
-   hctx-ccid2hctx_pipe = val;
-}
-
 static void ccid2_start_rto_timer(struct sock *sk);
 
 static void ccid2_hc_tx_rto_expire(unsigned long data)
@@ -205,11 +200,11 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
ccid2_start_rto_timer(sk);
 
/* adjust pipe, cwnd etc */
-   ccid2_change_pipe(hctx, 0);
hctx-ccid2hctx_ssthresh = hctx-ccid2hctx_cwnd / 2;
if (hctx-ccid2hctx_ssthresh  2)
hctx-ccid2hctx_ssthresh = 2;
hctx-ccid2hctx_cwnd = 1;
+   hctx-ccid2hctx_pipe = 0;
 
/* clear state about stuff we sent */
hctx-ccid2hctx_seqt= hctx-ccid2hctx_seqh;
@@ -248,8 +243,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int 
more, unsigned int len)
 
BUG_ON(!hctx-ccid2hctx_sendwait);
hctx-ccid2hctx_sendwait = 0;
-   ccid2_change_pipe(hctx, hctx-ccid2hctx_pipe + 1);
-   BUG_ON(hctx-ccid2hctx_pipe  0);
+   hctx-ccid2hctx_pipe++;
 
/* There is an issue.  What if another packet is sent between
 * packet_send() and packet_sent().  Then the sequence number would be
@@ -519,8 +513,10 @@ static void ccid2_hc_tx_dec_pipe(struct sock *sk)
 {
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
 
-   ccid2_change_pipe(hctx, hctx-ccid2hctx_pipe-1);
-   BUG_ON(hctx-ccid2hctx_pipe  0);
+   if (hctx-ccid2hctx_pipe == 0)
+   DCCP_BUG(pipe == 0);
+   else
+   hctx-ccid2hctx_pipe--;
 
if (hctx-ccid2hctx_pipe == 0)
ccid2_hc_tx_kill_rto_timer(sk);
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index b72e955..bc659f0 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -42,6 +42,7 @@ struct ccid2_seq {
 
 /** struct ccid2_hc_tx_sock - CCID2 TX half connection
  *
+ * @ccid2hctx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5
  * @ccid2hctx_ssacks - ACKs recv in slow start
  * @ccid2hctx_acks - ACKS recv in AI phase
  * @ccid2hctx_lastrtt -time RTT was last measured
@@ -51,9 +52,9 @@ struct ccid2_seq {
 struct ccid2_hc_tx_sock {
u32 ccid2hctx_cwnd;
u32 ccid2hctx_ssthresh;
+   u32 ccid2hctx_pipe;
int ccid2hctx_ssacks;
int ccid2hctx_acks;
-   int ccid2hctx_pipe;
struct ccid2_seq*ccid2hctx_seqbuf[CCID2_SEQBUF_MAX];
int ccid2hctx_seqbufc;
struct ccid2_seq*ccid2hctx_seqh;
-- 
1.5.3.4

-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 18/18] [DCCP]: Add support for abortive release

2007-11-24 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

This continues from the previous patch and adds support for actively aborting
a DCCP connection, using a Reset Code 2, Aborted to inform the peer of an
abortive release.

I have tried this in various client/server settings and it works as expected.

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Ian McDonald [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/proto.c |   15 +--
 1 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 5f47b45..73006b7 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -276,6 +276,12 @@ static inline int dccp_listen_start(struct sock *sk, int 
backlog)
return inet_csk_listen_start(sk, backlog);
 }
 
+static inline int dccp_need_reset(int state)
+{
+   return state != DCCP_CLOSED  state != DCCP_LISTEN 
+  state != DCCP_REQUESTING;
+}
+
 int dccp_disconnect(struct sock *sk, int flags)
 {
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -286,10 +292,15 @@ int dccp_disconnect(struct sock *sk, int flags)
if (old_state != DCCP_CLOSED)
dccp_set_state(sk, DCCP_CLOSED);
 
-   /* ABORT function of RFC793 */
+   /*
+* This corresponds to the ABORT function of RFC793, sec. 3.8
+* TCP uses a RST segment, DCCP a Reset packet with Code 2, Aborted.
+*/
if (old_state == DCCP_LISTEN) {
inet_csk_listen_stop(sk);
-   /* FIXME: do the active reset thing */
+   } else if (dccp_need_reset(old_state)) {
+   dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
+   sk-sk_err = ECONNRESET;
} else if (old_state == DCCP_REQUESTING)
sk-sk_err = ECONNRESET;
 
-- 
1.5.3.4

-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 09/18] [CCID2]: Remove unused variable

2007-11-24 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

This removes a variable `ccid2hctx_sent' which is incremented but
never referenced/read (i.e., dead code).

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Ian McDonald [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/ccids/ccid2.c |4 
 net/dccp/ccids/ccid2.h |2 --
 2 files changed, 0 insertions(+), 6 deletions(-)

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index a901202..9fa0eb5 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -221,7 +221,6 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
hctx-ccid2hctx_seqt= hctx-ccid2hctx_seqh;
hctx-ccid2hctx_ssacks  = 0;
hctx-ccid2hctx_acks= 0;
-   hctx-ccid2hctx_sent= 0;
 
/* clear ack ratio state. */
hctx-ccid2hctx_rpseq= 0;
@@ -285,8 +284,6 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int 
more, unsigned int len)
ccid2_pr_debug(cwnd=%d pipe=%d\n, hctx-ccid2hctx_cwnd,
   hctx-ccid2hctx_pipe);
 
-   hctx-ccid2hctx_sent++;
-
/*
 * FIXME: The code below is broken and the variables have been removed
 * from the socket struct. The `ackloss' variable was always set to 0,
@@ -517,7 +514,6 @@ static inline void ccid2_new_ack(struct sock *sk,
ccid2_pr_debug(srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n,
   hctx-ccid2hctx_srtt, hctx-ccid2hctx_rttvar,
   hctx-ccid2hctx_rto, HZ, r);
-   hctx-ccid2hctx_sent = 0;
}
 
/* we got a new ack, so re-start RTO timer */
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index ed95f8f..6d0b4e3 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -42,7 +42,6 @@ struct ccid2_seq {
  *
  * @ccid2hctx_ssacks - ACKs recv in slow start
  * @ccid2hctx_acks - ACKS recv in AI phase
- * @ccid2hctx_sent - packets sent in this window
  * @ccid2hctx_lastrtt -time RTT was last measured
  * @ccid2hctx_rpseq - last consecutive seqno
  * @ccid2hctx_rpdupack - dupacks since rpseq
@@ -61,7 +60,6 @@ struct ccid2_hc_tx_sock {
longccid2hctx_rto;
longccid2hctx_srtt;
longccid2hctx_rttvar;
-   int ccid2hctx_sent;
unsigned long   ccid2hctx_lastrtt;
struct timer_list   ccid2hctx_rtotimer;
u64 ccid2hctx_rpseq;
-- 
1.5.3.4

-
To unsubscribe from this list: send the line unsubscribe dccp in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 01/18] [ACKVEC]: Reduce length of identifiers

2007-11-24 Thread Arnaldo Carvalho de Melo
From: Gerrit Renker [EMAIL PROTECTED]

This is reduces the length of the struct ackvec/ackvec_record fields. It is
a purely text-based replacement:

s#dccpavr_#avr_#g;
s#dccpav_#av_#g;

and increases readability somewhat.

Signed-off-by: Gerrit Renker [EMAIL PROTECTED]
Signed-off-by: Arnaldo Carvalho de Melo [EMAIL PROTECTED]
---
 net/dccp/ackvec.c |  163 +
 net/dccp/ackvec.h |   62 ++--
 2 files changed, 108 insertions(+), 117 deletions(-)

diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 83378f3..6de4bd1 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -30,7 +30,7 @@ static struct dccp_ackvec_record *dccp_ackvec_record_new(void)
kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC);
 
if (avr != NULL)
-   INIT_LIST_HEAD(avr-dccpavr_node);
+   INIT_LIST_HEAD(avr-avr_node);
 
return avr;
 }
@@ -40,7 +40,7 @@ static void dccp_ackvec_record_delete(struct 
dccp_ackvec_record *avr)
if (unlikely(avr == NULL))
return;
/* Check if deleting a linked record */
-   WARN_ON(!list_empty(avr-dccpavr_node));
+   WARN_ON(!list_empty(avr-avr_node));
kmem_cache_free(dccp_ackvec_record_slab, avr);
 }
 
@@ -52,16 +52,15 @@ static void dccp_ackvec_insert_avr(struct dccp_ackvec *av,
 * just add the AVR at the head of the list.
 * -sorbo.
 */
-   if (!list_empty(av-dccpav_records)) {
+   if (!list_empty(av-av_records)) {
const struct dccp_ackvec_record *head =
-   list_entry(av-dccpav_records.next,
+   list_entry(av-av_records.next,
   struct dccp_ackvec_record,
-  dccpavr_node);
-   BUG_ON(before48(avr-dccpavr_ack_seqno,
-   head-dccpavr_ack_seqno));
+  avr_node);
+   BUG_ON(before48(avr-avr_ack_seqno, head-avr_ack_seqno));
}
 
-   list_add(avr-dccpavr_node, av-dccpav_records);
+   list_add(avr-avr_node, av-av_records);
 }
 
 int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
@@ -69,9 +68,8 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff 
*skb)
struct dccp_sock *dp = dccp_sk(sk);
struct dccp_ackvec *av = dp-dccps_hc_rx_ackvec;
/* Figure out how many options do we need to represent the ackvec */
-   const u16 nr_opts = DIV_ROUND_UP(av-dccpav_vec_len,
-DCCP_MAX_ACKVEC_OPT_LEN);
-   u16 len = av-dccpav_vec_len + 2 * nr_opts, i;
+   const u16 nr_opts = DIV_ROUND_UP(av-av_vec_len, 
DCCP_MAX_ACKVEC_OPT_LEN);
+   u16 len = av-av_vec_len + 2 * nr_opts, i;
u32 elapsed_time;
const unsigned char *tail, *from;
unsigned char *to;
@@ -81,7 +79,7 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff 
*skb)
if (DCCP_SKB_CB(skb)-dccpd_opt_len + len  DCCP_MAX_OPT_LEN)
return -1;
 
-   delta = ktime_us_delta(ktime_get_real(), av-dccpav_time);
+   delta = ktime_us_delta(ktime_get_real(), av-av_time);
elapsed_time = delta / 10;
 
if (elapsed_time != 0 
@@ -95,9 +93,9 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff 
*skb)
DCCP_SKB_CB(skb)-dccpd_opt_len += len;
 
to   = skb_push(skb, len);
-   len  = av-dccpav_vec_len;
-   from = av-dccpav_buf + av-dccpav_buf_head;
-   tail = av-dccpav_buf + DCCP_MAX_ACKVEC_LEN;
+   len  = av-av_vec_len;
+   from = av-av_buf + av-av_buf_head;
+   tail = av-av_buf + DCCP_MAX_ACKVEC_LEN;
 
for (i = 0; i  nr_opts; ++i) {
int copylen = len;
@@ -116,7 +114,7 @@ int dccp_insert_option_ackvec(struct sock *sk, struct 
sk_buff *skb)
to  += tailsize;
len -= tailsize;
copylen -= tailsize;
-   from= av-dccpav_buf;
+   from= av-av_buf;
}
 
memcpy(to, from, copylen);
@@ -134,19 +132,19 @@ int dccp_insert_option_ackvec(struct sock *sk, struct 
sk_buff *skb)
 *  buf_head; ack_ackno will equal buf_ackno; and ack_nonce will
 *  equal buf_nonce.
 */
-   avr-dccpavr_ack_seqno = DCCP_SKB_CB(skb)-dccpd_seq;
-   avr-dccpavr_ack_ptr   = av-dccpav_buf_head;
-   avr-dccpavr_ack_ackno = av-dccpav_buf_ackno;
-   avr-dccpavr_ack_nonce = av-dccpav_buf_nonce;
-   avr-dccpavr_sent_len  = av-dccpav_vec_len;
+   avr-avr_ack_seqno = DCCP_SKB_CB(skb)-dccpd_seq;
+   avr-avr_ack_ptr   = av-av_buf_head;
+   avr-avr_ack_ackno = av-av_buf_ackno;
+   avr-avr_ack_nonce = av-av_buf_nonce;
+   avr-avr_sent_len  = av

  1   2   3   4   5   >