Re: Refactor TCP partial ACK handling

2017-10-20 Thread Klemens Nanni
On Fri, Oct 20, 2017 at 09:07:20PM +0200, Mike Belopuhov wrote:
> This is a small and not intrusive refactoring of partial ACK handling
> but it certainly doesn't look like one.  It's intended to be applied
> after the TCP SACK diff that I've sent earlier and basically moves the
> conditional (SEQ_LT(th->th_ack, tp->snd_last)) out of tcp_sack_partialack
> and tcp_newreno into the tcp_input itself making these two functions just
> do the work and let tcp_input make decisions.  Here's how it looks after
> refactoring:
> 
>   if (tp->t_dupacks >= tcprexmtthresh) {
>   if (SEQ_LT(th->th_ack, tp->snd_last)) {
>   if (tp->sack_enable)
>   tcp_sack_partialack(tp, th);
>   else
>   tcp_newreno_partialack(tp, th);
>   } else {
>   /* Out of fast recovery */
>   tp->snd_cwnd = tp->snd_ssthresh;
>   if (tcp_seq_subtract(tp->snd_max, th->th_ack) <
>   tp->snd_ssthresh)
>   tp->snd_cwnd =
>   tcp_seq_subtract(tp->snd_max,
>   th->th_ack);
>   tp->t_dupacks = 0;
>   #ifdef TCP_FACK
>   if (tp->sack_enable &&
>   SEQ_GT(th->th_ack, tp->snd_fack))
>   tp->snd_fack = th->th_ack;
>   #endif
>   }
>   } else {
>   /*
>* Reset the duplicate ACK counter if we
>* were not in fast recovery.
>*/
>   tp->t_dupacks = 0;
>   }
> 
> This allows to consolidate the "out of fast recovery" branch currently
> repeated twice as well as show how tcp_sack_partialack and tcp_newreno
> interact without extra clutter.  The true branch of the old condition
> "if (tcp_sack_partialack(tp, th))" gets integrated into the function
> tcp_sack_partialack itself and tcp_newreno is renamed for consistency.
> 
> The diff also hooks up the "if (tp->t_dupacks < tcprexmtthresh)" branch
> to this "main if" since t_dupacks is ether greater then tcprexmtthresh
> or not.
> 
> In the end there's no (intentional) logic change at all, but gained
> clarity is quite substantial as noticed by FreeBSD folks as well.
> Consolidating the 'out of fast recovery' branch is also beneficial for
> later work.
> 
> OK?
> 
> diff --git sys/netinet/tcp_input.c sys/netinet/tcp_input.c
> index 9951923bbdb..84cdb35f048 100644
> --- sys/netinet/tcp_input.c
> +++ sys/netinet/tcp_input.c
> @@ -1664,52 +1664,38 @@ trimthenstep6:
>   }
>   /*
>* If the congestion window was inflated to account
>* for the other side's cached packets, retract it.
>*/
> - if (tp->sack_enable) {
> - if (tp->t_dupacks >= tcprexmtthresh) {
> - /* Check for a partial ACK */
> - if (tcp_sack_partialack(tp, th)) {
> -#ifdef TCP_FACK
> - /* Force call to tcp_output */
> - if (tp->snd_awnd < tp->snd_cwnd)
> - tp->t_flags |= TF_NEEDOUTPUT;
> -#else
> - tp->snd_cwnd += tp->t_maxseg;
> - tp->t_flags |= TF_NEEDOUTPUT;
> -#endif /* TCP_FACK */
> - } else {
> - /* Out of fast recovery */
> - tp->snd_cwnd = tp->snd_ssthresh;
> - if (tcp_seq_subtract(tp->snd_max,
> - th->th_ack) < tp->snd_ssthresh)
> - tp->snd_cwnd =
> -tcp_seq_subtract(tp->snd_max,
> -th->th_ack);
> - tp->t_dupacks = 0;
> -#ifdef TCP_FACK
> - if (SEQ_GT(th->th_ack, tp->snd_fack))
> - tp->snd_fack = th->th_ack;
> -#endif
> - }
> - }
> - } else {
> - if (tp->t_dupacks >= tcprexmtthresh &&
> - !tcp_newreno(tp, th)) {
> + if (tp->t_dupacks >= tcprexmtthresh) {
> + if (SEQ_LT(th->th_ack, tp->snd_last)) {
> + if (tp->sack_enable)
> + tcp_sack_partialack(tp, th);
> + else
> + tcp_newreno_partialack(tp, th);
> + } else {
>   /* Out of fast recovery */
>   tp->snd_cwnd = tp->snd_ssthresh;
> 

Refactor TCP partial ACK handling

2017-10-20 Thread Mike Belopuhov
This is a small and not intrusive refactoring of partial ACK handling
but it certainly doesn't look like one.  It's intended to be applied
after the TCP SACK diff that I've sent earlier and basically moves the
conditional (SEQ_LT(th->th_ack, tp->snd_last)) out of tcp_sack_partialack
and tcp_newreno into the tcp_input itself making these two functions just
do the work and let tcp_input make decisions.  Here's how it looks after
refactoring:

if (tp->t_dupacks >= tcprexmtthresh) {
if (SEQ_LT(th->th_ack, tp->snd_last)) {
if (tp->sack_enable)
tcp_sack_partialack(tp, th);
else
tcp_newreno_partialack(tp, th);
} else {
/* Out of fast recovery */
tp->snd_cwnd = tp->snd_ssthresh;
if (tcp_seq_subtract(tp->snd_max, th->th_ack) <
tp->snd_ssthresh)
tp->snd_cwnd =
tcp_seq_subtract(tp->snd_max,
th->th_ack);
tp->t_dupacks = 0;
  #ifdef TCP_FACK
if (tp->sack_enable &&
SEQ_GT(th->th_ack, tp->snd_fack))
tp->snd_fack = th->th_ack;
  #endif
}
} else {
/*
 * Reset the duplicate ACK counter if we
 * were not in fast recovery.
 */
tp->t_dupacks = 0;
}

This allows to consolidate the "out of fast recovery" branch currently
repeated twice as well as show how tcp_sack_partialack and tcp_newreno
interact without extra clutter.  The true branch of the old condition
"if (tcp_sack_partialack(tp, th))" gets integrated into the function
tcp_sack_partialack itself and tcp_newreno is renamed for consistency.

The diff also hooks up the "if (tp->t_dupacks < tcprexmtthresh)" branch
to this "main if" since t_dupacks is ether greater then tcprexmtthresh
or not.

In the end there's no (intentional) logic change at all, but gained
clarity is quite substantial as noticed by FreeBSD folks as well.
Consolidating the 'out of fast recovery' branch is also beneficial for
later work.

OK?

diff --git sys/netinet/tcp_input.c sys/netinet/tcp_input.c
index 9951923bbdb..84cdb35f048 100644
--- sys/netinet/tcp_input.c
+++ sys/netinet/tcp_input.c
@@ -1664,52 +1664,38 @@ trimthenstep6:
}
/*
 * If the congestion window was inflated to account
 * for the other side's cached packets, retract it.
 */
-   if (tp->sack_enable) {
-   if (tp->t_dupacks >= tcprexmtthresh) {
-   /* Check for a partial ACK */
-   if (tcp_sack_partialack(tp, th)) {
-#ifdef TCP_FACK
-   /* Force call to tcp_output */
-   if (tp->snd_awnd < tp->snd_cwnd)
-   tp->t_flags |= TF_NEEDOUTPUT;
-#else
-   tp->snd_cwnd += tp->t_maxseg;
-   tp->t_flags |= TF_NEEDOUTPUT;
-#endif /* TCP_FACK */
-   } else {
-   /* Out of fast recovery */
-   tp->snd_cwnd = tp->snd_ssthresh;
-   if (tcp_seq_subtract(tp->snd_max,
-   th->th_ack) < tp->snd_ssthresh)
-   tp->snd_cwnd =
-  tcp_seq_subtract(tp->snd_max,
-  th->th_ack);
-   tp->t_dupacks = 0;
-#ifdef TCP_FACK
-   if (SEQ_GT(th->th_ack, tp->snd_fack))
-   tp->snd_fack = th->th_ack;
-#endif
-   }
-   }
-   } else {
-   if (tp->t_dupacks >= tcprexmtthresh &&
-   !tcp_newreno(tp, th)) {
+   if (tp->t_dupacks >= tcprexmtthresh) {
+   if (SEQ_LT(th->th_ack, tp->snd_last)) {
+   if (tp->sack_enable)
+   tcp_sack_partialack(tp, th);
+   else
+   tcp_newreno_partialack(tp, th);
+   } else {
/* Out of fast recovery */
tp->snd_cwnd = tp->snd_ssthresh;
if (tcp_seq_subtract(tp->snd_max, th->th_ack) <
tp->snd_ssthresh)

Re: close cron sockets in child processes

2017-10-20 Thread Todd C. Miller
On Fri, 20 Oct 2017 16:25:32 +0200, Florian Riehm wrote:

> cron(8) opens /var/run/cron.sock for communication with crontab(1).
> The forked cronjobs have the socked still open.
> This prevents restarting cron while a job is running:
> (CRON) DEATH (already running)
> 
> I think cron's children should not inherit sockets.

There is a similar issue with at jobs.  Here's a comprehensive diff.

 - todd

Index: usr.sbin/cron/atrun.c
===
RCS file: /cvs/src/usr.sbin/cron/atrun.c,v
retrieving revision 1.46
diff -u -p -u -r1.46 atrun.c
--- usr.sbin/cron/atrun.c   8 Jun 2017 16:23:39 -   1.46
+++ usr.sbin/cron/atrun.c   20 Oct 2017 15:09:57 -
@@ -283,6 +283,9 @@ run_job(const atjob *job, int dfd, const
return;
}
 
+   /* close fds opened by the parent (e.g. cronSock) */
+   closefrom(3);
+
/*
 * We don't want the main cron daemon to wait for our children--
 * we will do it ourselves via waitpid().
Index: usr.sbin/cron/do_command.c
===
RCS file: /cvs/src/usr.sbin/cron/do_command.c,v
retrieving revision 1.56
diff -u -p -u -r1.56 do_command.c
--- usr.sbin/cron/do_command.c  17 Nov 2015 22:31:44 -  1.56
+++ usr.sbin/cron/do_command.c  20 Oct 2017 15:10:03 -
@@ -86,6 +86,9 @@ child_process(entry *e, user *u)
/* mark ourselves as different to PS command watchers */
setproctitle("running job");
 
+   /* close fds opened by the parent (e.g. cronSock) */
+   closefrom(3);
+
/* discover some useful and important environment settings
 */
usernm = e->pwd->pw_name;



Re: close cron sockets in child processes

2017-10-20 Thread Jeremie Courreges-Anglas
On Fri, Oct 20 2017, Florian Riehm  wrote:
> Hi,
>
> cron(8) opens /var/run/cron.sock for communication with crontab(1).
> The forked cronjobs have the socked still open.
> This prevents restarting cron while a job is running:
> (CRON) DEATH (already running)

Hah.

> I think cron's children should not inherit sockets.
>
> ok?

Looks reasonable, ok jca@

(I had to apply your diff manually, though.  Could you please add
a newline between setproctitle(3) and your comment?)

> friehm
>
> Index: usr.sbin/cron/do_command.c
> ===
> RCS file: /cvs/src/usr.sbin/cron/do_command.c,v
> retrieving revision 1.56
> diff -u -p -r1.56 do_command.c
> --- usr.sbin/cron/do_command.c17 Nov 2015 22:31:44 -  1.56
> +++ usr.sbin/cron/do_command.c20 Oct 2017 13:56:27 -
> @@ -86,6 +86,9 @@ child_process(entry *e, user *u)
>   /* mark ourselves as different to PS command watchers */
>   setproctitle("running job");
>  +/* close sockets from parent (i.e. cronSock) */
> + closefrom(3);
> +
>   /* discover some useful and important environment settings
>*/
>   usernm = e->pwd->pw_name;
>

-- 
jca | PGP : 0x1524E7EE / 5135 92C1 AD36 5293 2BDF  DDCC 0DFA 74AE 1524 E7EE



close cron sockets in child processes

2017-10-20 Thread Florian Riehm

Hi,

cron(8) opens /var/run/cron.sock for communication with crontab(1).
The forked cronjobs have the socked still open.
This prevents restarting cron while a job is running:
(CRON) DEATH (already running)

I think cron's children should not inherit sockets.

ok?

friehm

Index: usr.sbin/cron/do_command.c
===
RCS file: /cvs/src/usr.sbin/cron/do_command.c,v
retrieving revision 1.56
diff -u -p -r1.56 do_command.c
--- usr.sbin/cron/do_command.c  17 Nov 2015 22:31:44 -  1.56
+++ usr.sbin/cron/do_command.c  20 Oct 2017 13:56:27 -
@@ -86,6 +86,9 @@ child_process(entry *e, user *u)
/* mark ourselves as different to PS command watchers */
setproctitle("running job");
 
+	/* close sockets from parent (i.e. cronSock) */

+   closefrom(3);
+
/* discover some useful and important environment settings
 */
usernm = e->pwd->pw_name;



Re: tftpd(8): diff for ip path rewrite

2017-10-20 Thread Jeremie Courreges-Anglas
On Thu, Oct 19 2017, Stuart Henderson  wrote:
> On 2017/10/19 16:22, Theo de Raadt wrote:
>> I am always worried by non-intuitive magic behaviour.
>> 
>> It may serve some obvious purposes, but for someone else it is going
>> to break things.
>> 
>> I worry.
>
> The IP/filename -> filename fallback method seems good enough, but
> I agree with Theo.
>
> I think it would be safer behind a flag rather than on by default.

Same here.

-- 
jca | PGP : 0x1524E7EE / 5135 92C1 AD36 5293 2BDF  DDCC 0DFA 74AE 1524 E7EE



Re: tftpd(8): diff for ip path rewrite

2017-10-20 Thread Jeremie Courreges-Anglas
On Fri, Oct 20 2017, Sebastien Marie  wrote:
> On Thu, Oct 19, 2017 at 08:58:12PM +0200, Jan Klemkow wrote:
>> 
>> Index: tftpd.c
>> ===
>> RCS file: /mount/openbsd/cvs/src/usr.sbin/tftpd/tftpd.c,v
>> retrieving revision 1.39
>> diff -u -p -r1.39 tftpd.c
>> --- tftpd.c  26 May 2017 17:38:46 -  1.39
>> +++ tftpd.c  19 Oct 2017 18:27:24 -
>> @@ -903,8 +903,17 @@ again:
>>  
>>  if (rwmap != NULL)
>>  rewrite_map(client, filename);
>> -else
>> -tftp_open(client, filename);
>> +else {
>> +char nfilename[PATH_MAX];
>> +
>> +snprintf(nfilename, sizeof nfilename, "%s/%s",
>> +getip(>ss), filename);
>
> - filename has PATH_MAX length
> - getip(>ss) could have NI_MAXHOST length

INET6_ADDRSTRLEN since getip() calls getnameinfo(NI_NUMERICHOST), but
your point stands.

> so nfilename could be larger than PATH_MAX (sizeof nfilename).
>
> I assume the return of snprintf() need to be checked. if truncation
> occured, a warning should be issued and nfilename discarded (just
> calling tftp_open(client, filename)) ?

I think we should refuse the request if truncated.

>> +
>> +if (access(nfilename, R_OK) == 0)
>> +tftp_open(client, nfilename);
>> +else
>> +tftp_open(client, filename);
>> +}

Here we look up the same file in both the client-specific subdirectory
and the default directory.  Should we instead look only in the
client-specific directory if the latter exist?

>>  
>>  return;
>>  
>> 
>
> thanks

-- 
jca | PGP : 0x1524E7EE / 5135 92C1 AD36 5293 2BDF  DDCC 0DFA 74AE 1524 E7EE



Re: gas dwarf2 support

2017-10-20 Thread Mark Kettenis
> Date: Mon, 16 Oct 2017 00:26:10 +0200 (CEST)
> From: Mark Kettenis 
> 
> The diff below implements a few more CFI directives in order to be
> able to build clang on sparc64.
> 
> ok?

ping?

I'd like to get this in early in the cycle just in case this produces
some fallout in ports...

> Index: gnu/usr.bin/binutils-2.17/gas/dw2gencfi.c
> ===
> RCS file: /cvs/src/gnu/usr.bin/binutils-2.17/gas/dw2gencfi.c,v
> retrieving revision 1.1.1.1
> diff -u -p -r1.1.1.1 dw2gencfi.c
> --- gnu/usr.bin/binutils-2.17/gas/dw2gencfi.c 24 Apr 2011 20:14:44 -  
> 1.1.1.1
> +++ gnu/usr.bin/binutils-2.17/gas/dw2gencfi.c 15 Oct 2017 22:22:49 -
> @@ -21,6 +21,7 @@
>  
>  #include "as.h"
>  #include "dw2gencfi.h"
> +#include "subsegs.h"
>  
>  
>  /* We re-use DWARF2_LINE_MIN_INSN_LENGTH for the code alignment field
> @@ -87,6 +88,10 @@ struct fde_entry
>symbolS *end_address;
>struct cfi_insn_data *data;
>struct cfi_insn_data **last;
> +  unsigned char per_encoding;
> +  unsigned char lsda_encoding;
> +  expressionS personality;
> +  expressionS lsda;
>unsigned int return_column;
>unsigned int signal_frame;
>  };
> @@ -97,15 +102,13 @@ struct cie_entry
>symbolS *start_address;
>unsigned int return_column;
>unsigned int signal_frame;
> +  unsigned char per_encoding;
> +  unsigned char lsda_encoding;
> +  expressionS personality;
>struct cfi_insn_data *first, *last;
>  };
>  
>  
> -/* Current open FDE entry.  */
> -static struct fde_entry *cur_fde_data;
> -static symbolS *last_address;
> -static offsetT cur_cfa_offset;
> -
>  /* List of FDE entries.  */
>  static struct fde_entry *all_fde_data;
>  static struct fde_entry **last_fde_data = _fde_data;
> @@ -120,7 +123,14 @@ struct cfa_save_data
>offsetT cfa_offset;
>  };
>  
> -static struct cfa_save_data *cfa_save_stack;
> +/* Current open FDE entry.  */
> +struct frch_cfi_data
> +{
> +  struct fde_entry *cur_fde_data;
> +  symbolS *last_address;
> +  offsetT cur_cfa_offset;
> +  struct cfa_save_data *cfa_save_stack;
> +};
>  
>  /* Construct a new FDE structure and add it to the end of the fde list.  */
>  
> @@ -129,12 +139,15 @@ alloc_fde_entry (void)
>  {
>struct fde_entry *fde = xcalloc (1, sizeof (struct fde_entry));
>  
> -  cur_fde_data = fde;
> +  frchain_now->frch_cfi_data = xcalloc (1, sizeof (struct frch_cfi_data));
> +  frchain_now->frch_cfi_data->cur_fde_data = fde;
>*last_fde_data = fde;
>last_fde_data = >next;
>  
>fde->last = >data;
>fde->return_column = DWARF2_DEFAULT_RETURN_COLUMN;
> +  fde->per_encoding = DW_EH_PE_omit;
> +  fde->lsda_encoding = DW_EH_PE_omit;
>  
>return fde;
>  }
> @@ -149,6 +162,7 @@ static struct cfi_insn_data *
>  alloc_cfi_insn_data (void)
>  {
>struct cfi_insn_data *insn = xcalloc (1, sizeof (struct cfi_insn_data));
> +  struct fde_entry *cur_fde_data = frchain_now->frch_cfi_data->cur_fde_data;
>  
>*cur_fde_data->last = insn;
>cur_fde_data->last = >next;
> @@ -163,7 +177,7 @@ cfi_new_fde (symbolS *label)
>  {
>struct fde_entry *fde = alloc_fde_entry ();
>fde->start_address = label;
> -  last_address = label;
> +  frchain_now->frch_cfi_data->last_address = label;
>  }
>  
>  /* End the currently open FDE.  */
> @@ -171,8 +185,9 @@ cfi_new_fde (symbolS *label)
>  void 
>  cfi_end_fde (symbolS *label)
>  {
> -  cur_fde_data->end_address = label;
> -  cur_fde_data = NULL;
> +  frchain_now->frch_cfi_data->cur_fde_data->end_address = label;
> +  free (frchain_now->frch_cfi_data);
> +  frchain_now->frch_cfi_data = NULL;
>  }
>  
>  /* Set the return column for the current FDE.  */
> @@ -180,7 +195,7 @@ cfi_end_fde (symbolS *label)
>  void
>  cfi_set_return_column (unsigned regno)
>  {
> -  cur_fde_data->return_column = regno;
> +  frchain_now->frch_cfi_data->cur_fde_data->return_column = regno;
>  }
>  
>  /* Universal functions to store new instructions.  */
> @@ -239,10 +254,10 @@ cfi_add_advance_loc (symbolS *label)
>struct cfi_insn_data *insn = alloc_cfi_insn_data ();
>  
>insn->insn = DW_CFA_advance_loc;
> -  insn->u.ll.lab1 = last_address;
> +  insn->u.ll.lab1 = frchain_now->frch_cfi_data->last_address;
>insn->u.ll.lab2 = label;
>  
> -  last_address = label;
> +  frchain_now->frch_cfi_data->last_address = label;
>  }
>  
>  /* Add a DW_CFA_offset record to the CFI data.  */
> @@ -252,6 +267,7 @@ cfi_add_CFA_offset (unsigned regno, offs
>  {
>unsigned int abs_data_align;
>  
> +  assert (DWARF2_CIE_DATA_ALIGNMENT != 0);
>cfi_add_CFA_insn_reg_offset (DW_CFA_offset, regno, offset);
>  
>abs_data_align = (DWARF2_CIE_DATA_ALIGNMENT < 0
> @@ -266,7 +282,7 @@ void
>  cfi_add_CFA_def_cfa (unsigned regno, offsetT offset)
>  {
>cfi_add_CFA_insn_reg_offset (DW_CFA_def_cfa, regno, offset);
> -  cur_cfa_offset = offset;
> +  frchain_now->frch_cfi_data->cur_cfa_offset = offset;
>  }
>  
>  /* Add a DW_CFA_register record to the CFI data.  

Re: proctreelk

2017-10-20 Thread Martin Pieuchot
On 14/10/17(Sat) 22:07, Philip Guenther wrote:
> 
> The diff below adds proctreelk, an rwlock protecting the links of the 
> process tree and related bits, as well as uidinfolk, an rwlock protecting 
> the uidinfo hash table.
> 
> Parts of this are based on FreeBSD's proctree_lock, particularly the 
> reorganization of enterpgrp() into enternewpgrp() and enterthispgrp() and 
> the splitting out of killjobc() from exit1().
> 
> This diff should address the previously reported crashes seen when using 
> ktrace(2) while creating/exiting processes.
> 
> This has been stable for quite a while under my usage; please test and 
> report any issues.

First of all, I'm very happy to see this diff.  Thanks Philip.

I have been running this diff on my amd64 NFS client/server build
machine since you posted it.  So far no issue, so it is stable for
this usage as well.

I'd however appreciate if you could commit the killjobc() and
enter*grp() refactoring first.  Because in case of revert this
would be less pain.

That's also for this reason that I introduced a macro for the
NET_LOCK().  So I could enable/disable it without having to revert
N files.  No idea if this could be useful there two.

I like the way you annotate the protected elements in the structure.
I'll try to do the same for bpf.

I'm also suggesting you commit the `uidinfolk' bits first.  This seems
quite safe.  In this exact part, what about introducing a uid_release(),
a wrapper around rw_exit_write(), to be called after uid_find()?
This way you can keep the lock local.

I have other questions, but I'll keep them for the upcoming smaller
diffs :)

> Index: sys/proc.h
> ===
> RCS file: /data/src/openbsd/src/sys/sys/proc.h,v
> retrieving revision 1.240
> diff -u -p -r1.240 proc.h
> --- sys/proc.h29 Aug 2017 02:51:27 -  1.240
> +++ sys/proc.h1 Sep 2017 05:48:15 -
> @@ -44,6 +44,7 @@
>  #include  /* For struct selinfo */
>  #include/* For LOGIN_NAME_MAX */
>  #include 
> +#include   /* For struct rwlock */
>  #include  /* For struct timeout */
>  #include/* For struct klist */
>  #include/* For struct mutex */
> @@ -55,16 +56,25 @@
>  #endif
>  
>  /*
> + * Locks used to protect struct members in this file:
> + *   I   immutable after creation
> + *   t   proctreelk
> + *
> + * If multiple locks are listed then all are required for writes,
> + * but any one of them is sufficient for reads.
> + */
> +
> +/*
>   * One structure allocated per session.
>   */
>  struct process;
>  struct   session {
>   int s_count;/* Ref cnt; pgrps in session. */
> - struct  process *s_leader;  /* Session leader. */
> + struct  process *s_leader;  /* [t] Session leader. */
>   struct  vnode *s_ttyvp; /* Vnode of controlling terminal. */
> - struct  tty *s_ttyp;/* Controlling terminal. */
> - chars_login[LOGIN_NAME_MAX];/* Setlogin() name. */
> - pid_t   s_verauthppid;
> + struct  tty *s_ttyp;/* [t] Controlling terminal. */
> + chars_login[LOGIN_NAME_MAX];/* setlogin() name. */
> + pid_t   s_verauthppid;  /* TIOCSETVERAUTH info */
>   uid_t   s_verauthuid;
>   struct timeout s_verauthto;
>  };
> @@ -75,10 +85,10 @@ void zapverauth(/* struct session */ voi
>   * One structure allocated per process group.
>   */
>  struct   pgrp {
> - LIST_ENTRY(pgrp) pg_hash;   /* Hash chain. */
> - LIST_HEAD(, process) pg_members;/* Pointer to pgrp members. */
> - struct  session *pg_session;/* Pointer to session. */
> - pid_t   pg_id;  /* Pgrp id. */
> + LIST_ENTRY(pgrp) pg_hash;   /* [t] Hash chain. */
> + LIST_HEAD(, process) pg_members;/* [t] Pointer to pgrp members. */
> + struct  session *pg_session;/* [I] Pointer to session. */
> + pid_t   pg_id;  /* [I] Pgrp id. */
>   int pg_jobc;/* # procs qualifying pgrp for job control */
>  };
>  
> @@ -156,17 +166,17 @@ struct process {
>   LIST_ENTRY(process) ps_list;/* List of all processes. */
>   TAILQ_HEAD(,proc) ps_threads;   /* Threads in this process. */
>  
> - LIST_ENTRY(process) ps_pglist;  /* List of processes in pgrp. */
> - struct  process *ps_pptr;   /* Pointer to parent process. */
> - LIST_ENTRY(process) ps_sibling; /* List of sibling processes. */
> - LIST_HEAD(, process) ps_children;/* Pointer to list of children. */
> + LIST_ENTRY(process) ps_pglist;  /* [t] List of processes in pgrp. */
> + struct  process *ps_pptr;   /* [t] Pointer to parent process. */
> + LIST_ENTRY(process) ps_sibling; /* [t] List of sibling processes. */
> + LIST_HEAD(, process) ps_children;/* [t] Pointer to list of children. */
>   LIST_ENTRY(process) ps_hash;/* 

Re: Please test: IPsec w/o KERNEL_LOCK()

2017-10-20 Thread Klemens Nanni
On Mon, Oct 16, 2017 at 12:47:06PM +0200, Martin Pieuchot wrote:
> On 11/10/17(Wed) 17:01, Martin Pieuchot wrote:
> > OpenBSD 6.2 includes nice performance and latency improvements due to
> > the work done in the Network Stack in the previous years.  However as
> > soon as IPsec is enabled, all network related processing are affected.
> > In other words you cannot profit from the last MP work in the Network
> > stack if you use IPsec.
> > 
> > During the last 6 months I hoped that somebody else would look at the
> > IPsec stack and tell us what needs to be done.  This didn't happen.
> > 
> > Now that 6.2 is released, we cannot afford to continue to parallelize
> > the Network Stack if some of our users and testers still run it under
> > KERNEL_LOCK(). 
> > 
> > So I did an audit of the IPsec stack and came with the small diff below.
> > This diff doesn't remove the KERNEL_LOCK() (yet), but add some asserts
> > to make sure that the global data structure are all accessed while
> > holding the NET_LOCK().
> 
> Here's the diff to stop grabbing the KERNEL_LOCK(), please test and
> report back.
> 
> Index: net/if.c
> ===
> RCS file: /cvs/src/sys/net/if.c,v
> retrieving revision 1.517
> diff -u -p -r1.517 if.c
> --- net/if.c  16 Oct 2017 08:19:15 -  1.517
> +++ net/if.c  16 Oct 2017 10:37:37 -
> @@ -871,9 +871,6 @@ if_input_process(void *xifidx)
>   struct ifih *ifih;
>   struct srp_ref sr;
>   int s;
> -#ifdef IPSEC
> - int locked = 0;
> -#endif /* IPSEC */
>  
>   ifp = if_get(ifidx);
>   if (ifp == NULL)
> @@ -900,22 +897,6 @@ if_input_process(void *xifidx)
>*/
>   NET_LOCK();
>   s = splnet();
> -
> -#ifdef IPSEC
> - /*
> -  * IPsec is not ready to run without KERNEL_LOCK().  So all
> -  * the traffic on your machine is punished if you have IPsec
> -  * enabled.
> -  */
> - extern int ipsec_in_use;
> - if (ipsec_in_use) {
> - NET_UNLOCK();
> - KERNEL_LOCK();
> - NET_LOCK();
> - locked = 1;
> - }
> -#endif /* IPSEC */
> -
>   while ((m = ml_dequeue()) != NULL) {
>   /*
>* Pass this mbuf to all input handlers of its
> @@ -932,11 +913,6 @@ if_input_process(void *xifidx)
>   }
>   splx(s);
>   NET_UNLOCK();
> -
> -#ifdef IPSEC
> - if (locked)
> - KERNEL_UNLOCK();
> -#endif /* IPSEC */
>  out:
>   if_put(ifp);
>  }
> Index: netinet/ip_input.c
> ===
> RCS file: /cvs/src/sys/netinet/ip_input.c,v
> retrieving revision 1.323
> diff -u -p -r1.323 ip_input.c
> --- netinet/ip_input.c9 Oct 2017 08:35:38 -   1.323
> +++ netinet/ip_input.c16 Oct 2017 10:41:09 -
> @@ -482,8 +482,6 @@ ip_input_if(struct mbuf **mp, int *offp,
>   if (ipsec_in_use) {
>   int rv;
>  
> - KERNEL_ASSERT_LOCKED();
> -
>   rv = ipsec_forward_check(m, hlen, AF_INET);
>   if (rv != 0) {
>   ipstat_inc(ips_cantforward);
> @@ -1825,40 +1823,16 @@ ip_send_dispatch(void *xmq)
>   struct mbuf_queue *mq = xmq;
>   struct mbuf *m;
>   struct mbuf_list ml;
> -#ifdef IPSEC
> - int locked = 0;
> -#endif /* IPSEC */
>  
>   mq_delist(mq, );
>   if (ml_empty())
>   return;
>  
>   NET_LOCK();
> -
> -#ifdef IPSEC
> - /*
> -  * IPsec is not ready to run without KERNEL_LOCK().  So all
> -  * the traffic on your machine is punished if you have IPsec
> -  * enabled.
> -  */
> - extern int ipsec_in_use;
> - if (ipsec_in_use) {
> - NET_UNLOCK();
> - KERNEL_LOCK();
> - NET_LOCK();
> - locked = 1;
> - }
> -#endif /* IPSEC */
> -
>   while ((m = ml_dequeue()) != NULL) {
>   ip_output(m, NULL, NULL, 0, NULL, NULL, 0);
>   }
>   NET_UNLOCK();
> -
> -#ifdef IPSEC
> - if (locked)
> - KERNEL_UNLOCK();
> -#endif /* IPSEC */
>  }
>  
>  void
> Index: netinet/ip_output.c
> ===
> RCS file: /cvs/src/sys/netinet/ip_output.c,v
> retrieving revision 1.342
> diff -u -p -r1.342 ip_output.c
> --- netinet/ip_output.c   20 Sep 2017 16:22:02 -  1.342
> +++ netinet/ip_output.c   16 Oct 2017 10:41:22 -
> @@ -233,7 +233,6 @@ reroute:
>  
>  #ifdef IPSEC
>   if (ipsec_in_use || inp != NULL) {
> - KERNEL_ASSERT_LOCKED();
>   /* Do we have any pending SAs to apply ? */
>   tdb = ip_output_ipsec_lookup(m, hlen, , inp,
>   ipsecflowinfo);
> @@ -404,7 +403,6 @@ sendit:
>* Check if the packet needs encapsulation.
>*/
>   if (tdb != NULL) {
> - KERNEL_ASSERT_LOCKED();
>   /* Callee frees mbuf */
>   error = ip_output_ipsec_send(tdb, m,