Re: arm64 setjmp/longjmp signal mask storage error

2017-03-06 Thread Patrick Wildt
On Tue, Mar 07, 2017 at 01:44:19AM -0500, Dale Rahn wrote:
> Setjmp, longjmp was converted from calling sigprocmask to invoking
> sigprocmask directly. The ABI for the function call and the syscall
> are not the same and the register manipulation code was not updated 
> in the change.
> 
> This diff moves the jmpbuf to x2 for the duration of the sigprocmask syscall
> and loads x0/x1 with the appropriate values and saves the returned x0
> as the signal mask.
> 
> Other than storing x0 and x30 (lr) on the stack, this should be equivalent
> to calling sigprocmask 'bl sigprocmaskB instead of 'SYSTRAP(sigprocmask)'
> 
> diff --git a/lib/libc/arch/aarch64/gen/setjmp.S 
> b/lib/libc/arch/aarch64/gen/setjmp.S
> index ba4010be7ff..76c1be5b9b5 100644
> --- a/lib/libc/arch/aarch64/gen/setjmp.S
> +++ b/lib/libc/arch/aarch64/gen/setjmp.S
> @@ -34,16 +34,15 @@
>  #include 
>  
>  ENTRY(setjmp)
> - stp x0, x30, [sp, #-16]!
>  
> + mov x2, x0  /* save jmpbuf in x2 */
>   /* Store the signal mask */
> - add x2, x0, #(_JB_SIGMASK * 8)  /* oset */
> - mov x1, #0  /* set */
> + mov w1, #0  /* set */
>   mov x0, #1  /* SIG_BLOCK */
>   SYSTRAP(sigprocmask)
> + str w0, [x2, #(_JB_SIGMASK * 8)]/* oset */
>  
> - ldp x0, x30, [sp], #16
> -
> + mov x0, x2
>   /* Store the magic value and stack pointer */
>   ldr x8, .Lmagic
>   mov x9, sp
> @@ -73,18 +72,15 @@ ENTRY(setjmp)
>  END_STRONG(setjmp)
>  
>  ENTRY(longjmp)
> - stp x0, x1, [sp, #-32]!
> - str x30, [sp, #24]
> + mov x2, x0  /* move jmpbuf */
> + mov x3, x1  /* final return value */
>  
>   /* Restore the signal mask */
> - mov x2, #0  /* oset */
> - add x1, x0, #(_JB_SIGMASK * 8)  /* set */
> + ldr x1, [x2, #(_JB_SIGMASK * 8)]/* set */

Shouldn't this be w1 instead of x1, since you are storing it using w0?

>   mov x0, #3  /* SIG_SETMASK */
>   SYSTRAP(sigprocmask)
>  
> - ldr x30, [sp, #24]
> - ldp x0, x1, [sp], #32
> -
> + mov x0, x2
>   /* Check the magic value */
>   ldr x8, [x0], #8
>   ldr x9, .Lmagic
> @@ -110,7 +106,7 @@ ENTRY(longjmp)
>   ldp d14, d15, [x0]
>  
>   /* Load the return value */
> - mov x0, x1
> + mov x0, x3
>   ret
>  
>  botch:
> Dale Rahn dr...@dalerahn.com
> 



Re: arm64 SMP support, diff #3/5

2017-03-06 Thread Dale Rahn
On Thu, Feb 23, 2017 at 01:03:57PM +1100, Jonathan Gray wrote:
> On Wed, Feb 22, 2017 at 06:45:52PM -0500, Dale Rahn wrote:
> > Add psci 2.0 features to spin up/down/suspend processors.
> > 
> > This change uses extern weak symbols to determine if the platform supports
> > the cpu_on/cpu_off/cpu_suspend features. If available, it saves the fdt
> > provided argument values.
> > 
> > Some PSCI calls will return an int status, so the callfn has been updated
> > to return that status (eg cpu_on fail)
> > 
> > diff --git a/sys/dev/fdt/psci.c b/sys/dev/fdt/psci.c
> > index fceafd0e9ba..3177ff06beb 100644
> > --- a/sys/dev/fdt/psci.c
> > +++ b/sys/dev/fdt/psci.c
> > @@ -29,12 +29,19 @@
> >  extern void (*cpuresetfn)(void);
> >  extern void (*powerdownfn)(void);
> >  
> > +extern int (*cpu_suspend_fn)(void) __attribute__((weak)) ;
> > +extern int (*cpu_off_fn)(void) __attribute__((weak)) ;
> > +extern int (*cpu_on_fn)(uint64_t, uint64_t) __attribute__((weak)) ;
> > +
> >  #define SYSTEM_OFF 0x8408
> >  #define SYSTEM_RESET   0x8409
> >  
> >  struct psci_softc {
> > struct devicesc_dev;
> > -   void (*callfn)(uint32_t, uint32_t, uint32_t, 
> > uint32_t);
> > +   int  (*callfn)(uint32_t, uint32_t, uint32_t, 
> > uint32_t);
> > +   int sc_cpu_on;
> > +   int sc_cpu_off;
> > +   int sc_cpu_suspend;
> >  };
> 
> spacing is out here
> 
> >  
> >  struct psci_softc *psci_sc;
> > @@ -43,9 +50,12 @@ int  psci_match(struct device *, void *, void *);
> >  void   psci_attach(struct device *, struct device *, void *);
> >  void   psci_reset(void);
> >  void   psci_powerdown(void);
> > +intpsci_cpu_suspend(void);
> > +intpsci_cpu_off(void);
> > +intpsci_cpu_on(uint64_t, uint64_t);
> >  
> > -extern void hvc_call(uint32_t, uint32_t, uint32_t, uint32_t);
> > -extern void smc_call(uint32_t, uint32_t, uint32_t, uint32_t);
> > +extern int hvc_call(uint32_t, uint32_t, uint32_t, uint32_t);
> > +extern int smc_call(uint32_t, uint32_t, uint32_t, uint32_t);
> >  
> >  struct cfattach psci_ca = {
> > sizeof(struct psci_softc), psci_match, psci_attach
> > @@ -83,6 +93,24 @@ psci_attach(struct device *parent, struct device *self, 
> > void *aux)
> > psci_sc = sc;
> > cpuresetfn = psci_reset;
> > powerdownfn = psci_powerdown;
> > +
> > +   if (_suspend_fn != NULL) {
> > +   sc->sc_cpu_suspend = OF_getpropint(faa->fa_node, "cpu_suspend", 
> > 0);
> > +   if (sc->sc_cpu_suspend != 0)
> > +   cpu_suspend_fn = psci_cpu_suspend;
> > +   }
> > +
> > +   if (_on_fn != NULL) {
> > +   sc->sc_cpu_on = OF_getpropint(faa->fa_node, "cpu_on", 0);
> > +   if (sc->sc_cpu_on != 0)
> > +   cpu_on_fn = psci_cpu_on;
> > +   }
> > +
> > +   if (_off_fn != NULL) {
> > +   sc->sc_cpu_off = OF_getpropint(faa->fa_node, "cpu_off", 0);
> > +   if (sc->sc_cpu_off != 0)
> > +   cpu_off_fn = psci_cpu_off;
> > +   }
> >  }
> 
> Can these nodes be relied on as being present?
> 
> Outside of qemu -M virt
> 
> psci {
> migrate = <0x8405>;
> cpu_on = <0x8403>;
> cpu_off = <0x8402>;
> cpu_suspend = <0x8401>;
> method = "hvc";
> compatible = "arm,psci-0.2", "arm,psci";
> };
> 
> I can only find
> 
> arm/boot/dts/artpec6.dtsi:  cpu_on = <0x8403>;
> arm/boot/dts/ecx-common.dtsi:   cpu_on  = <0x8406>;
> arm/boot/dts/keystone.dtsi: cpu_on  = <0x8403>;
> arm/boot/dts/xenvm-4.2.dts: cpu_on  = <2>;
> arm64/boot/dts/al/alpine-v2.dtsi:   cpu_on = <0x8403>;
> arm64/boot/dts/exynos/exynos5433.dtsi:  cpu_on = <0xC403>;
> arm64/boot/dts/lg/lg1312.dtsi:  cpu_on = <0x8403>;
> arm64/boot/dts/lg/lg1313.dtsi:  cpu_on = <0x8403>;
> arm64/boot/dts/mediatek/mt8173.dtsi:cpu_on= <0x8403>;
> arm64/boot/dts/sprd/sc9836.dtsi:cpu_on  = 
> <0xc403>;
> 
> 0x8403 is SMC32, 0xC403 is SMC64 and there is actually
> a different calling convention for these...
> 
> The existing SYSTEM_OFF and SYSTEM_RESET uses are fine as they are just
> a 32 bit function id with no arguments.  But these new functions
> need to care about the arguments.
> 
> There is also PSCI_FEATURES in PSCI 1.0 that tests if function ids are
> supported.
> 

Updated diff, this has changed a bit since the psci driver has changed.

diff --git a/sys/dev/fdt/psci.c b/sys/dev/fdt/psci.c
index b24613a275c..2ba500ea718 100644
--- a/sys/dev/fdt/psci.c
+++ b/sys/dev/fdt/psci.c
@@ -29,14 +29,18 @@
 extern void (*cpuresetfn)(void);
 extern void (*powerdownfn)(void);
 
+extern int (*cpu_on_fn)(uint64_t, uint64_t) __attribute__((weak)) ;
+
 #define SYSTEM_OFF 0x8408
 #define SYSTEM_RESET   0x8409
+#define SYSTEM_CPU_ON640xC403
 
 struct psci_softc {
 

Re: ntp_update_second - removed unused var

2017-03-06 Thread Jeremie Courreges-Anglas
David Hill  writes:

> Hello -
>
> time_t *sec is unused in ntp_update_second.
> OK?

ok jca@

> Index: kern/kern_tc.c
> ===
> RCS file: /cvs/src/sys/kern/kern_tc.c,v
> retrieving revision 1.30
> diff -u -p -r1.30 kern_tc.c
> --- kern/kern_tc.c9 Feb 2017 20:15:28 -   1.30
> +++ kern/kern_tc.c3 Mar 2017 05:37:17 -
> @@ -41,7 +41,7 @@
>  
>  u_int dummy_get_timecount(struct timecounter *);
>  
> -void ntp_update_second(int64_t *, time_t *);
> +void ntp_update_second(int64_t *);
>  int sysctl_tc_hardware(void *, size_t *, void *, size_t);
>  int sysctl_tc_choice(void *, size_t *, void *, size_t);
>  
> @@ -434,7 +434,7 @@ tc_windup(void)
>   if (i > LARGE_STEP)
>   i = 2;
>   for (; i > 0; i--)
> - ntp_update_second(>th_adjustment, );
> + ntp_update_second(>th_adjustment);
>  
>   /* Update the UTC timestamps used by the get*() functions. */
>   /* XXX shouldn't do this here.  Should force non-`get' versions. */
> @@ -617,7 +617,7 @@ sysctl_tc(int *name, u_int namelen, void
>  }
>  
>  void
> -ntp_update_second(int64_t *adjust, time_t *sec)
> +ntp_update_second(int64_t *adjust)
>  {
>   int64_t adj;
>  
>

-- 
jca | PGP : 0x1524E7EE / 5135 92C1 AD36 5293 2BDF  DDCC 0DFA 74AE 1524 E7EE



Re: in_pcballoc IPv6 hash

2017-03-06 Thread Jeremie Courreges-Anglas
Alexander Bluhm  writes:

> Hi,
>
> Initially all inpcb, including the IPv6 ones, are hooked into the
> IPv4 hash.  They cannot be used before bind(2) and then they are
> rehashed and rehooked, so this is not noticed.  Nevertheless I think
> this is a bug and they should go into the IPv6 hash from the
> beginning.
>
> ok?

ok jca@

> bluhm
>
> Index: netinet/in_pcb.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v
> retrieving revision 1.218
> diff -u -p -r1.218 in_pcb.c
> --- netinet/in_pcb.c  6 Mar 2017 08:59:07 -   1.218
> +++ netinet/in_pcb.c  6 Mar 2017 23:08:43 -
> @@ -276,8 +276,16 @@ in_pcballoc(struct socket *so, struct in
>   TAILQ_INSERT_HEAD(>inpt_queue, inp, inp_queue);
>   head = INPCBLHASH(table, inp->inp_lport, inp->inp_rtableid);
>   LIST_INSERT_HEAD(head, inp, inp_lhash);
> - head = INPCBHASH(table, >inp_faddr, inp->inp_fport,
> - >inp_laddr, inp->inp_lport, rtable_l2(inp->inp_rtableid));
> +#ifdef INET6
> + if (sotopf(so) == PF_INET6)
> + head = IN6PCBHASH(table, >inp_faddr6, inp->inp_fport,
> + >inp_laddr6, inp->inp_lport,
> + rtable_l2(inp->inp_rtableid));
> + else
> +#endif /* INET6 */
> + head = INPCBHASH(table, >inp_faddr, inp->inp_fport,
> + >inp_laddr, inp->inp_lport,
> + rtable_l2(inp->inp_rtableid));
>   LIST_INSERT_HEAD(head, inp, inp_hash);
>   splx(s);
>   so->so_pcb = inp;
>

-- 
jca | PGP : 0x1524E7EE / 5135 92C1 AD36 5293 2BDF  DDCC 0DFA 74AE 1524 E7EE



arm64 setjmp/longjmp signal mask storage error

2017-03-06 Thread Dale Rahn
Setjmp, longjmp was converted from calling sigprocmask to invoking
sigprocmask directly. The ABI for the function call and the syscall
are not the same and the register manipulation code was not updated 
in the change.

This diff moves the jmpbuf to x2 for the duration of the sigprocmask syscall
and loads x0/x1 with the appropriate values and saves the returned x0
as the signal mask.

Other than storing x0 and x30 (lr) on the stack, this should be equivalent
to calling sigprocmask 'bl sigprocmaskB instead of 'SYSTRAP(sigprocmask)'

diff --git a/lib/libc/arch/aarch64/gen/setjmp.S 
b/lib/libc/arch/aarch64/gen/setjmp.S
index ba4010be7ff..76c1be5b9b5 100644
--- a/lib/libc/arch/aarch64/gen/setjmp.S
+++ b/lib/libc/arch/aarch64/gen/setjmp.S
@@ -34,16 +34,15 @@
 #include 
 
 ENTRY(setjmp)
-   stp x0, x30, [sp, #-16]!
 
+   mov x2, x0  /* save jmpbuf in x2 */
/* Store the signal mask */
-   add x2, x0, #(_JB_SIGMASK * 8)  /* oset */
-   mov x1, #0  /* set */
+   mov w1, #0  /* set */
mov x0, #1  /* SIG_BLOCK */
SYSTRAP(sigprocmask)
+   str w0, [x2, #(_JB_SIGMASK * 8)]/* oset */
 
-   ldp x0, x30, [sp], #16
-
+   mov x0, x2
/* Store the magic value and stack pointer */
ldr x8, .Lmagic
mov x9, sp
@@ -73,18 +72,15 @@ ENTRY(setjmp)
 END_STRONG(setjmp)
 
 ENTRY(longjmp)
-   stp x0, x1, [sp, #-32]!
-   str x30, [sp, #24]
+   mov x2, x0  /* move jmpbuf */
+   mov x3, x1  /* final return value */
 
/* Restore the signal mask */
-   mov x2, #0  /* oset */
-   add x1, x0, #(_JB_SIGMASK * 8)  /* set */
+   ldr x1, [x2, #(_JB_SIGMASK * 8)]/* set */
mov x0, #3  /* SIG_SETMASK */
SYSTRAP(sigprocmask)
 
-   ldr x30, [sp, #24]
-   ldp x0, x1, [sp], #32
-
+   mov x0, x2
/* Check the magic value */
ldr x8, [x0], #8
ldr x9, .Lmagic
@@ -110,7 +106,7 @@ ENTRY(longjmp)
ldp d14, d15, [x0]
 
/* Load the return value */
-   mov x0, x1
+   mov x0, x3
ret
 
 botch:
Dale Rahn   dr...@dalerahn.com



Re: 61.html - document resolver(3) support for EDNS0 and DNSSEC

2017-03-06 Thread Jeremie Courreges-Anglas

I have committed a similar diff, thanks.

-- 
jca | PGP : 0x1524E7EE / 5135 92C1 AD36 5293 2BDF  DDCC 0DFA 74AE 1524 E7EE



Re: 11n support for athn(4)

2017-03-06 Thread Timo Myyrä
Stefan Sperling  writes:

> On Mon, Mar 06, 2017 at 07:36:21AM +0200, Timo Myyrä wrote:
>
>> Did some tcpbench testing and got following results:
>> Each test run with: tcpbench -s || tcpbench -t 15  commands.
>> Host AP: apu 2b4 with athn, client = thinkpad t430s with iwn (OpenBSD)
>> 
>> channel 9 running old snapshot etc:
>> 11n client -> server ~4, server -> ~0,
>> 11g client -> server ~16, server -> client ~0-6mbs
>> ---
>> updated to new snapshot: OpenBSD 6.0-current (GENERIC.MP) #206: Sat Mar  4 
>> 09:22:00 MST 2017
>> = added another antenna, moved the ap to better spot, switched old ap off, 
>> changed channel to 11
>> 11n client -> server: ~6mbps, server -> client ~0.2mbps
>> 11g client -> server: ~7mbps, server -> client ~3mbps
>> 
>> ---
>> switch to channel 3:
>> 11n client -> server: ~7mbps, server -> client: ~0-5mbps
>> 11g client -> server: 16mbps, server -> client: ~5mbps
>> 
>> ---
>> applied dyn rts patch:
>> 11n client -> server: 4-7mbps, server -> client 0.2-5.5mbps
>> 11g client -> server: ~4mbps, server -> client: ~5.5mbps
>
> What made 11n go down from 16 to 4?
> 11g is not affected by this patch so something else affected 11g.
> Could it be other networks on overlapping channels?

Yeah, I have around ten wireless networks around. Some seem to be mobile AP
which come and go. 

>  
> To tell whether the patch has any effect in your case I would like to
> know which HT protection setting your AP is using.
>
> Find a snapshot dated a bit after 2017/03/04 10:51:20 MST, or make sure 
> tcpdump
> sources are -current and rebuild and install tcpdump. Associate to the AP,
> and run: tcpdump -n -i iwn0 -y IEEE802_11_RADIO -s 4096 -v -l | grep beacon
> and in htop=<...> look for 'htprot'. If it shows 'htprot none' then dynamic
> RTS is used in 11n mode (i.e. my patch will switch RTS on/off as needed).
> Otherwise, you have some pre-11n devices in your environment so RTS must
> always be enabled and my patch makes no difference.

06:59:51.809091 802.11 flags=0<>: beacon, 
caps=2061, ssid (wifee), rates 1M* 
2M* 5M* 11M* 6M 9M 12M 18M, ds (chan 6), tim 0x0001, xrates 24M 36M 48M 
54M, rsn 0x010fac04010fac04010fac02, htcaps=<20MHz,A-MSDU 
3839,A-MPDU max 8191,RxMCS 0x>, htop=<20MHz chan 6,STA 
chanw 20MHz,htprot non-HT-mixed,basic MCS set 0x>, vendor 
0x0050f202010103a427a442435e0062322f00, 

So I guess its pre-11n devices somewhere ruining my day. 

>
> Note that the iwn(4) driver does not yet support MIMO in 11n mode.
> Once it does, 11n should become faster than 11g. I have seen an iwm(4) client
> which supports MIMO max out at 21 Mbps tcpbench towards my athn(4) AP, on a
> 5GHz channel with 'htprot none'.
> Unfortunately, tcpbench in the other direction (athn -> iwm) peaks at 10 Mbps.
> There is plenty of room for improvement.
>

I didn't think it would improve things yet but I had the antenna so I'd figure
I'd stick it in the AP while I'm tweaking it anyway.

Speaking of 5Ghz, my AP uses athn chipset AR9280 which seems to support 2.4Ghz
and 5Ghz. Can I use 5Ghz with my AP to see which devices would break after such
transition. I guess I would need to get 5Ghz antenna and just stick that to my
AP?
Can OpenBSD AP work on both frequencies at the same time or is that something
not yet supported?

>> At least what pops up in the measurements is that 11g gives more stable
>> behaviour. 11n speed seems to vary a lot in that 15s test compared to 11g.
>
> This could be explained by differences in rate scaling algorithms.
> In -current 11g uses AMRR, and 11n mode uses MiRa which jumps around more.
> In 6.0 everything used AMRR so a comparing how a 6.0 iwn client performs
> in 11n mode would be useful (you could e.g. install 6.0 to a USB stick
> and boot from it for running a speed test).

I have only very limited observations, like typing commands via ssh has usually
lag with 11n and works pretty smoothly on 11g.

timo



Re: Preserve original pf flow classification

2017-03-06 Thread David Gwynne
> On 7 Mar 2017, at 05:27, Mike Belopuhov  wrote:
> 
> There's no need to overwrite original flow ID assigned on
> input so that it remains stable throughout the life of a
> packet.  The rationale is that output processing may split,
> encapsulate or obfuscate a single stream which makes the
> changed flow ID useless for purposes of flow control, for
> instance fair sharing of bandwidth, etc.
> 
> Is the diff below OK?
> 
> diff --git sys/net/pf.c sys/net/pf.c
> index f64fdc4c80d..f3a5717be4a 100644
> --- sys/net/pf.c
> +++ sys/net/pf.c
> @@ -6779,11 +6779,11 @@ done:
>   pd.m->m_pkthdr.pf.inp->inp_pf_sk =
>   pf_state_key_ref(s->key[PF_SK_STACK]);
>   s->key[PF_SK_STACK]->inp = pd.m->m_pkthdr.pf.inp;
>   }
> 
> - if (s) {
> + if (s && (pd.m->m_pkthdr.ph_flowid & M_FLOWID_VALID) == 0) {
>   pd.m->m_pkthdr.ph_flowid = M_FLOWID_VALID |
>   (M_FLOWID_MASK & bemtoh64(>id));
>   }
> 
>   /*
> 

ok.



acpisbs/acpibat

2017-03-06 Thread joshua stein
In case the DSDT provides interfaces for both a Smart Battery and a
normal one, probe for acpisbs first, if it attaches, make acpibat
not match.


Index: dev/acpi/acpi.c
===
RCS file: /cvs/src/sys/dev/acpi/acpi.c,v
retrieving revision 1.323
diff -u -p -u -p -r1.323 acpi.c
--- dev/acpi/acpi.c 2 Mar 2017 10:38:10 -   1.323
+++ dev/acpi/acpi.c 7 Mar 2017 02:34:37 -
@@ -116,6 +116,7 @@ voidacpi_enable_wakegpes(struct acpi_so
 intacpi_foundec(struct aml_node *, void *);
 intacpi_foundsony(struct aml_node *node, void *arg);
 intacpi_foundhid(struct aml_node *, void *);
+intacpi_foundsbs(struct aml_node *node, void *);
 intacpi_add_device(struct aml_node *node, void *arg);
 
 void   acpi_thread(void *);
@@ -1082,6 +1083,9 @@ acpi_attach(struct device *parent, struc
 
aml_walknodes(_root, AML_WALK_PRE, acpi_add_device, sc);
 
+   /* try to find smart battery first */
+   aml_find_node(_root, "_HID", acpi_foundsbs, sc);
+
/* attach battery, power supply and button devices */
aml_find_node(_root, "_HID", acpi_foundhid, sc);
 
@@ -2911,6 +2915,45 @@ acpi_foundvideo(struct aml_node *node, v
 
return (0);
 }
+
+int
+acpi_foundsbs(struct aml_node *node, void *arg)
+{
+   struct acpi_softc   *sc = (struct acpi_softc *)arg;
+   struct device   *self = (struct device *)arg;
+   char cdev[32], dev[32];
+   struct acpi_attach_args  aaa;
+   int64_t  sta;
+
+   if (acpi_parsehid(node, arg, cdev, dev, sizeof(dev)) != 0)
+   return (0);
+
+   if (aml_evalinteger(sc, node->parent, "_STA", 0, NULL, ))
+   sta = STA_PRESENT | STA_ENABLED | STA_DEV_OK | 0x1000;
+
+   if ((sta & STA_PRESENT) == 0)
+   return (0);
+
+   acpi_attach_deps(sc, node->parent);
+
+   if (strcmp(dev, ACPI_DEV_SBS) != 0)
+   return (0);
+
+   if (node->parent->attached)
+   return (0);
+
+   memset(, 0, sizeof(aaa));
+   aaa.aaa_iot = sc->sc_iot;
+   aaa.aaa_memt = sc->sc_memt;
+   aaa.aaa_node = node->parent;
+   aaa.aaa_dev = dev;
+
+   config_found(self, , acpi_print);
+   node->parent->attached = 1;
+
+   return (0);
+}
+
 
 int
 acpiopen(dev_t dev, int flag, int mode, struct proc *p)
Index: dev/acpi/acpibat.c
===
RCS file: /cvs/src/sys/dev/acpi/acpibat.c,v
retrieving revision 1.62
diff -u -p -u -p -r1.62 acpibat.c
--- dev/acpi/acpibat.c  14 Mar 2015 03:38:46 -  1.62
+++ dev/acpi/acpibat.c  7 Mar 2017 02:34:37 -
@@ -54,6 +54,9 @@ acpibat_match(struct device *parent, voi
struct acpi_attach_args *aa = aux;
struct cfdata   *cf = match;
 
+   if (((struct acpi_softc *)parent)->sc_havesbs)
+   return (0);
+
/* sanity */
return (acpi_matchhids(aa, acpibat_hids, cf->cf_driver->cd_name));
 }
Index: dev/acpi/acpireg.h
===
RCS file: /cvs/src/sys/dev/acpi/acpireg.h,v
retrieving revision 1.38
diff -u -p -u -p -r1.38 acpireg.h
--- dev/acpi/acpireg.h  25 Feb 2017 20:09:20 -  1.38
+++ dev/acpi/acpireg.h  7 Mar 2017 02:34:37 -
@@ -748,10 +748,10 @@ struct acpi_ivrs {
 #define ACPI_DEV_MEMD  "PNP0C80"   /* Memory Device */
 #define ACPI_DEV_MOUSE "PNP0F13"   /* PS/2 Mouse */
 #define ACPI_DEV_SHC   "ACPI0001"  /* SMBus 1.0 Host Controller */
-#define ACPI_DEV_SMS1  "ACPI0002"  /* Smart Battery Subsystem */
+#define ACPI_DEV_SBS   "ACPI0002"  /* Smart Battery Subsystem */
 #define ACPI_DEV_AC"ACPI0003"  /* AC Device */
 #define ACPI_DEV_MD"ACPI0004"  /* Module Device */
-#define ACPI_DEV_SMS2  "ACPI0005"  /* SMBus 2.0 Host Controller */
+#define ACPI_DEV_SMBUS "ACPI0005"  /* SMBus 2.0 Host Controller */
 #define ACPI_DEV_GBD   "ACPI0006"  /* GPE Block Device */
 #define ACPI_DEV_PD"ACPI0007"  /* Processor Device */
 #define ACPI_DEV_ALSD  "ACPI0008"  /* Ambient Light Sensor Device */
Index: dev/acpi/acpisbs.c
===
RCS file: /cvs/src/sys/dev/acpi/acpisbs.c,v
retrieving revision 1.3
diff -u -p -u -p -r1.3 acpisbs.c
--- dev/acpi/acpisbs.c  7 Mar 2017 02:27:02 -   1.3
+++ dev/acpi/acpisbs.c  7 Mar 2017 02:34:37 -
@@ -144,7 +144,7 @@ struct cfdriver acpisbs_cd = {
 };
 
 const char *acpisbs_hids[] = {
-   "ACPI0002",
+   ACPI_DEV_SBS,
NULL
 };
 
@@ -211,6 +211,8 @@ acpisbs_attach(struct device *parent, st
 
aml_register_notify(sc->sc_devnode, aa->aaa_dev, acpisbs_notify,
sc, ACPIDEV_POLL);
+
+   sc->sc_acpi->sc_havesbs = 1;
 }
 
 void
Index: dev/acpi/acpivar.h
===
RCS file: /cvs/src/sys/dev/acpi/acpivar.h,v

Re: priq: convert to mbuf lists

2017-03-06 Thread David Gwynne

> On 2 Mar 2017, at 21:19, Mike Belopuhov  wrote:
> 
> On Thu, Mar 02, 2017 at 10:11 +0100, Martin Pieuchot wrote:
>> On 02/03/17(Thu) 01:16, Mike Belopuhov wrote:
>>> On 2 March 2017 at 00:56, David Gwynne  wrote:
 
> On 2 Mar 2017, at 06:43, Mike Belopuhov  wrote:
> 
> This convers hand rolled lists into exactly the same mbuf_lists.
> I need this because of the next diff that uses the ml_len packet
> counter that mbuf_lists have.  Otherwise there's no functional
> change.
 
 i didnt use mbuf lists here because they have an extra counter
 that isnt, or wasnt, needed.
 
 im not sure you need to know how long a list is in your later
 diff, you just need to know if it is not empty. you can do that
 by checking if the head is NULL.
 
>>> 
>>> true, i was thinking about clarifying this, but i like the
>>> "declarativeness" of the length check.  i don't think that an
>>> extra counter is a big deal.  also mbuf lists make this code look
>>> simpler which is a good thing, imo.
>> 
>> I agree.  I find the code much easier to understand with mikeb@'s diff.
>> 
>> IMHO we should avoid hand-rolled lists.
>> 
>> ok mpi@
>> 
> 
> Here's an updated diff with suggestions from bluhm@ and it's
> even slimmer now but I don't mind either way.  If David isn't
> comfortable with this change, we don't have to do it.

im really not keen, but i guess i will submit to populism if everyone else 
feels strongly about this.

> 
> ---
> sys/net/ifq.c | 50 ++
> 1 file changed, 18 insertions(+), 32 deletions(-)
> 
> diff --git sys/net/ifq.c sys/net/ifq.c
> index 40731637a2c..a601e9b9bd1 100644
> --- sys/net/ifq.c
> +++ sys/net/ifq.c
> @@ -51,17 +51,12 @@ const struct ifq_ops * const ifq_priq_ops = _ops;
> 
> /*
>  * priq internal structures
>  */
> 
> -struct priq_list {
> - struct mbuf *head;
> - struct mbuf *tail;
> -};
> -
> struct priq {
> - struct priq_list pq_lists[IFQ_NQUEUES];
> + struct mbuf_list pq_lists[IFQ_NQUEUES];
> };
> 
> /*
>  * ifqueue serialiser
>  */
> @@ -392,11 +387,17 @@ priq_idx(unsigned int nqueues, const struct mbuf *m)
> }
> 
> void *
> priq_alloc(unsigned int idx, void *null)
> {
> - return (malloc(sizeof(struct priq), M_DEVBUF, M_WAITOK | M_ZERO));
> + struct priq *pq;
> + int i;
> +
> + pq = malloc(sizeof(struct priq), M_DEVBUF, M_WAITOK);
> + for (i = 0; i < IFQ_NQUEUES; i++)
> + ml_init(>pq_lists[i]);
> + return (pq);
> }
> 
> void
> priq_free(unsigned int idx, void *pq)
> {
> @@ -405,40 +406,35 @@ priq_free(unsigned int idx, void *pq)
> 
> int
> priq_enq(struct ifqueue *ifq, struct mbuf *m)
> {
>   struct priq *pq;
> - struct priq_list *pl;
> + struct mbuf_list *pl;
> 
>   if (ifq_len(ifq) >= ifq->ifq_maxlen)
>   return (ENOBUFS);
> 
>   pq = ifq->ifq_q;
>   KASSERT(m->m_pkthdr.pf.prio <= IFQ_MAXPRIO);
>   pl = >pq_lists[m->m_pkthdr.pf.prio];
> 
> - m->m_nextpkt = NULL;
> - if (pl->tail == NULL)
> - pl->head = m;
> - else
> - pl->tail->m_nextpkt = m;
> - pl->tail = m;
> + ml_enqueue(pl, m);
> 
>   return (0);
> }
> 
> struct mbuf *
> priq_deq_begin(struct ifqueue *ifq, void **cookiep)
> {
>   struct priq *pq = ifq->ifq_q;
> - struct priq_list *pl;
> + struct mbuf_list *pl;
>   unsigned int prio = nitems(pq->pq_lists);
>   struct mbuf *m;
> 
>   do {
>   pl = >pq_lists[--prio];
> - m = pl->head;
> + m = MBUF_LIST_FIRST(pl);
>   if (m != NULL) {
>   *cookiep = pl;
>   return (m);
>   }
>   } while (prio > 0);
> @@ -447,35 +443,25 @@ priq_deq_begin(struct ifqueue *ifq, void **cookiep)
> }
> 
> void
> priq_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie)
> {
> - struct priq_list *pl = cookie;
> -
> - KASSERT(pl->head == m);
> + struct mbuf_list *pl = cookie;
> 
> - pl->head = m->m_nextpkt;
> - m->m_nextpkt = NULL;
> + KASSERT(MBUF_LIST_FIRST(pl) == m);
> 
> - if (pl->head == NULL)
> - pl->tail = NULL;
> + ml_dequeue(pl);
> }
> 
> void
> priq_purge(struct ifqueue *ifq, struct mbuf_list *ml)
> {
>   struct priq *pq = ifq->ifq_q;
> - struct priq_list *pl;
> + struct mbuf_list *pl;
>   unsigned int prio = nitems(pq->pq_lists);
> - struct mbuf *m, *n;
> + struct mbuf *m;
> 
>   do {
>   pl = >pq_lists[--prio];
> -
> - for (m = pl->head; m != NULL; m = n) {
> - n = m->m_nextpkt;
> - ml_enqueue(ml, m);
> - }
> -
> - pl->head = pl->tail = NULL;
> + ml_enlist(ml, pl);
>   } while (prio > 0);
> }
> -- 
> 2.12.0
> 



decryption failed or bad record mac (was: tlsv1 alert decrypt error)

2017-03-06 Thread Kirill Miazine

Thanks for moving the thread to the correct place!

* Bob Beck [2017-03-06 15:49]:

And as joel mentioned, a fix is already arriving for this - there was a bug
in SSLv2 compatible handshake initiation,


Joel sent me a patch which appeared here:

http://cvsweb.openbsd.org/cgi-bin/cvsweb/src/lib/libssl/ssl_packet.c.diff?r1=1.4=1.5=date

And with this patch (I assume it's part of the most recent snapshot) the
error message got different

28 Feb snapshot: ACCEPT_SR_KEY_EXCH:tlsv1 alert decrypt error
Today's snapshot: ACCEPT_SR_CERT_VRFY:decryption failed or bad record mac

And as the error message is different now, I'm changing the subject to
get a new thread.

Either there a different fix which I'm still missing or this is
a completely new issue. In either case I'm happy to assist with
debugging this, I could even try to collect some packets.


and Paypal still has it enabled... (yeeuch)


On good side (for the project), I trigger new emails from PayPal by
sending some EUR to the foundation. I hope this won't delay the final
fix from coming! :)


On Mon, Mar 6, 2017 at 3:48 PM, Bob Beck  wrote:



Move it to tech@ from misc.. not libressl.. libressl is not special ;)

On Mon, Mar 6, 2017 at 3:21 PM, Kirill Miazine  wrote:


Moving to libressl@ from misc@, as it's a LibreSSL issue.

* Joel Sing [2017-03-05 23:01]:

On Thursday 02 March 2017 13:28:08 Kirill Miazine wrote:



Recently I've noticed a number of error messages in my Exim mail log:

TLS error on connection from mx1.slc.paypal.com (mx0.slc.paypal.com
)
[173.0.84.226] \ (SSL_accept): error:1403741B:SSL
routines:ACCEPT_SR_KEY_EXCH:tlsv1 alert decrypt error TLS client
disconnected cleanly (rejected our certificate?)



This is most likely the same issue as that reported on the libressl@
mailing
list a day or so ago - expect a fix to arrive shortly.



I rebuilt exim on latest snapshot (OpenBSD 6.1-beta (GENERIC.MP) #213:
Mon Mar  6 12:31:59 MST 2017) and the error looks different now:

TLS error on connection from mx0.phx.paypal.com [66.211.168.230] \
   (SSL_accept): error:14039119:SSL routines:ACCEPT_SR_CERT_VRFY:decryption
\
   failed or bad record mac


--
   -- Kirill Miazine 






--
   -- Kirill Miazine 



Re: tlsv1 alert decrypt error

2017-03-06 Thread Bob Beck
And as joel mentioned, a fix is already arriving for this - there was a bug
in SSLv2 compatible handshake initiation,
and Paypal still has it enabled... (yeeuch)

On Mon, Mar 6, 2017 at 3:48 PM, Bob Beck  wrote:

>
> Move it to tech@ from misc.. not libressl.. libressl is not special ;)
>
> On Mon, Mar 6, 2017 at 3:21 PM, Kirill Miazine  wrote:
>
>> Moving to libressl@ from misc@, as it's a LibreSSL issue.
>>
>> * Joel Sing [2017-03-05 23:01]:
>>
>> On Thursday 02 March 2017 13:28:08 Kirill Miazine wrote:
>>>
 Recently I've noticed a number of error messages in my Exim mail log:

 TLS error on connection from mx1.slc.paypal.com (mx0.slc.paypal.com
 )
 [173.0.84.226] \ (SSL_accept): error:1403741B:SSL
 routines:ACCEPT_SR_KEY_EXCH:tlsv1 alert decrypt error TLS client
 disconnected cleanly (rejected our certificate?)

>>>
>>> This is most likely the same issue as that reported on the libressl@
>>> mailing
>>> list a day or so ago - expect a fix to arrive shortly.
>>>
>>
>> I rebuilt exim on latest snapshot (OpenBSD 6.1-beta (GENERIC.MP) #213:
>> Mon Mar  6 12:31:59 MST 2017) and the error looks different now:
>>
>> TLS error on connection from mx0.phx.paypal.com [66.211.168.230] \
>>(SSL_accept): error:14039119:SSL routines:ACCEPT_SR_CERT_VRFY:decryption
>> \
>>failed or bad record mac
>>
>>
>> --
>>-- Kirill Miazine 
>>
>>
>


Re: tlsv1 alert decrypt error

2017-03-06 Thread Bob Beck
Move it to tech@ from misc.. not libressl.. libressl is not special ;)

On Mon, Mar 6, 2017 at 3:21 PM, Kirill Miazine  wrote:

> Moving to libressl@ from misc@, as it's a LibreSSL issue.
>
> * Joel Sing [2017-03-05 23:01]:
>
> On Thursday 02 March 2017 13:28:08 Kirill Miazine wrote:
>>
>>> Recently I've noticed a number of error messages in my Exim mail log:
>>>
>>> TLS error on connection from mx1.slc.paypal.com (mx0.slc.paypal.com)
>>> [173.0.84.226] \ (SSL_accept): error:1403741B:SSL
>>> routines:ACCEPT_SR_KEY_EXCH:tlsv1 alert decrypt error TLS client
>>> disconnected cleanly (rejected our certificate?)
>>>
>>
>> This is most likely the same issue as that reported on the libressl@
>> mailing
>> list a day or so ago - expect a fix to arrive shortly.
>>
>
> I rebuilt exim on latest snapshot (OpenBSD 6.1-beta (GENERIC.MP) #213:
> Mon Mar  6 12:31:59 MST 2017) and the error looks different now:
>
> TLS error on connection from mx0.phx.paypal.com [66.211.168.230] \
>(SSL_accept): error:14039119:SSL routines:ACCEPT_SR_CERT_VRFY:decryption
> \
>failed or bad record mac
>
>
> --
>-- Kirill Miazine 
>
>


synaptics(4): multitouch input and hysteresis

2017-03-06 Thread Ulf Brosziewski
While preparing and testing some changes in ubcmtp I found a bug in
wsconscomm that may affect two-finger touches on "noisy" MT devices, that
is, on devices that constantly report small deltas of the touch positions
even if there is no noticeable movement.

Whenever the contact count or the pointer-controlling touch changes,
wsmouse will generate a TOUCH_RESET event, which signals that there is a
coordinate update that may not correspond to a movement on the touchpad.
The wsconscomm handler ensures that in this case the driver doesn't
trigger pointer movement. If scrolling is active, it also adjusts the
scroll coordinates and the "move history" to the new position (this is
necessary because the driver computes its scroll deltas independently of
the motion deltas).

The hysteresis coordinates will need a similar adjustment. If it works
with stale coordinates, the filter may produce an output that differs
from its input by an amount that is equal to the hysteresis threshold.
If you have a "noisy" device and put two fingers on its surface without
moving them, pointer control can quickly switch back and forth, and the
deltas add up quickly to visible changes of the scroll position.

The good news is that the fix is much shorter than its explanation.

OK?


Index: src/wsconscomm.c
===
RCS file: /cvs/xenocara/driver/xf86-input-synaptics/src/wsconscomm.c,v
retrieving revision 1.16
diff -u -p -r1.16 wsconscomm.c
--- src/wsconscomm.c12 Sep 2016 22:12:44 -  1.16
+++ src/wsconscomm.c6 Mar 2017 18:37:41 -
@@ -252,6 +252,9 @@ WSConsReadHwState(InputInfoPtr pInfo,
 if (reset) {
 /* Ensure that pointer motion stops. */
 priv->count_packet_finger = 0;
+/* Don't use stale coordinates for filtering. */
+priv->hyst_center_x = hw->x;
+priv->hyst_center_y = hw->y;
 if (priv->vert_scroll_twofinger_on
 || priv->horiz_scroll_twofinger_on) {
 WSConsAdjustScrollCoords(priv, hw);



Re: {tc,ud}btable & NET_LOCK()

2017-03-06 Thread Alexander Bluhm
On Mon, Mar 06, 2017 at 12:13:48PM +0100, Martin Pieuchot wrote:
> In sysctl_file() splnet() is no longer what we want.  Arguably the
> KERNEL_LOCK() should be enough, but since pf(4) is messing with these
> tables, and nobody turned this part of pf(4) MP, let's require the
> NET_LOCK().
> 
> ok?

OK bluhm@

I think we should do the same for the other loops and insertions
of this queue.

ok?

bluhm

Index: netinet/in_pcb.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v
retrieving revision 1.218
diff -u -p -r1.218 in_pcb.c
--- netinet/in_pcb.c6 Mar 2017 08:59:07 -   1.218
+++ netinet/in_pcb.c6 Mar 2017 19:07:17 -
@@ -249,10 +249,9 @@ int
 in_pcballoc(struct socket *so, struct inpcbtable *table)
 {
struct inpcb *inp;
-   int s;
struct inpcbhead *head;
 
-   splsoftassert(IPL_SOFTNET);
+   NET_ASSERT_LOCKED();
 
if (inpcb_pool_initialized == 0) {
pool_init(_pool, sizeof(struct inpcb), 0,
@@ -269,7 +268,6 @@ in_pcballoc(struct socket *so, struct in
inp->inp_seclevel[SL_ESP_NETWORK] = IPSEC_ESP_NETWORK_LEVEL_DEFAULT;
inp->inp_seclevel[SL_IPCOMP] = IPSEC_IPCOMP_LEVEL_DEFAULT;
inp->inp_rtableid = curproc->p_p->ps_rtableid;
-   s = splnet();
if (table->inpt_hash != 0 &&
table->inpt_count++ > INPCBHASH_LOADFACTOR(table->inpt_hash))
(void)in_pcbresize(table, (table->inpt_hash + 1) * 2);
@@ -279,7 +277,6 @@ in_pcballoc(struct socket *so, struct in
head = INPCBHASH(table, >inp_faddr, inp->inp_fport,
>inp_laddr, inp->inp_lport, rtable_l2(inp->inp_rtableid));
LIST_INSERT_HEAD(head, inp, inp_hash);
-   splx(s);
so->so_pcb = inp;
inp->inp_hops = -1;
 
@@ -577,9 +574,8 @@ void
 in_pcbdetach(struct inpcb *inp)
 {
struct socket *so = inp->inp_socket;
-   int s;
 
-   splsoftassert(IPL_SOFTNET);
+   NET_ASSERT_LOCKED();
 
so->so_pcb = 0;
sofree(so);
@@ -602,12 +598,10 @@ in_pcbdetach(struct inpcb *inp)
pf_inp_unlink(inp);
}
 #endif
-   s = splnet();
LIST_REMOVE(inp, inp_lhash);
LIST_REMOVE(inp, inp_hash);
TAILQ_REMOVE(>inp_table->inpt_queue, inp, inp_queue);
inp->inp_table->inpt_count--;
-   splx(s);
pool_put(_pool, inp);
 }
 
@@ -661,7 +655,7 @@ in_pcbnotifyall(struct inpcbtable *table
struct inpcb *inp, *ninp;
struct in_addr faddr;
 
-   splsoftassert(IPL_SOFTNET);
+   NET_ASSERT_LOCKED();
 
 #ifdef INET6
/*
@@ -956,10 +950,10 @@ void
 in_pcbrehash(struct inpcb *inp)
 {
struct inpcbtable *table = inp->inp_table;
-   int s;
struct inpcbhead *head;
 
-   s = splnet();
+   NET_ASSERT_LOCKED();
+
LIST_REMOVE(inp, inp_lhash);
head = INPCBLHASH(table, inp->inp_lport, inp->inp_rtableid);
LIST_INSERT_HEAD(head, inp, inp_lhash);
@@ -975,7 +969,6 @@ in_pcbrehash(struct inpcb *inp)
>inp_laddr, inp->inp_lport,
rtable_l2(inp->inp_rtableid));
LIST_INSERT_HEAD(head, inp, inp_hash);
-   splx(s);
 }
 
 int
Index: netinet6/in6_pcb.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_pcb.c,v
retrieving revision 1.96
diff -u -p -r1.96 in6_pcb.c
--- netinet6/in6_pcb.c  4 Aug 2016 20:46:24 -   1.96
+++ netinet6/in6_pcb.c  6 Mar 2017 19:07:51 -
@@ -334,6 +334,8 @@ in6_pcbnotify(struct inpcbtable *head, s
int errno, nmatch = 0;
u_int32_t flowinfo;
 
+   NET_ASSERT_LOCKED();
+
if ((unsigned)cmd >= PRC_NCMDS)
return (0);
 



Re: take PRU_ATTACH out of usrreq

2017-03-06 Thread Alexander Bluhm
On Sun, Mar 05, 2017 at 10:54:39AM +0100, Claudio Jeker wrote:
> Here is an updated version of the PRU_ATTACH diff. I looked at the pfkey
> code and decided to only do the minimum now. I will go in afterwards and
> kill the dynamic fiddling in there. There is no reason for all this
> complication since we only support one and only one version of pfkey.

This diff is not minimal.  By changing socket->so_proto->pr_protocol
to proto you broke socket(AF_KEY,0x3,0).  ipsecctl(9)
uses socket(AF_KEY,0x3,0x2) so this is not noticed.

Such a diff may not change user land semantics.

> I would like to commit this version and then do the pfkey cleanup on top
> of this.

Yes please.  I took me a while to understand that something broke.

> Any OKs?

Do not do unnecessary changes in pfkey_attach() to keep review simple.
The route_usrreq() chunk does not apply anymore after mpi@'s commit.

Everything else is OK bluhm@

> -- 
> :wq Claudio
> 
> Index: kern/uipc_proto.c
> ===
> RCS file: /cvs/src/sys/kern/uipc_proto.c,v
> retrieving revision 1.13
> diff -u -p -r1.13 uipc_proto.c
> --- kern/uipc_proto.c 2 Mar 2017 08:58:24 -   1.13
> +++ kern/uipc_proto.c 5 Mar 2017 09:23:04 -
> @@ -55,6 +55,7 @@ struct protosw unixsw[] = {
>.pr_protocol   = PF_LOCAL,
>.pr_flags  = PR_CONNREQUIRED|PR_WANTRCVD|PR_RIGHTS,
>.pr_usrreq = uipc_usrreq,
> +  .pr_attach = uipc_attach,
>  },
>  {
>.pr_type   = SOCK_SEQPACKET,
> @@ -62,6 +63,7 @@ struct protosw unixsw[] = {
>.pr_protocol   = PF_LOCAL,
>.pr_flags  = PR_ATOMIC|PR_CONNREQUIRED|PR_WANTRCVD|PR_RIGHTS,
>.pr_usrreq = uipc_usrreq,
> +  .pr_attach = uipc_attach,
>  },
>  {
>.pr_type   = SOCK_DGRAM,
> @@ -69,6 +71,7 @@ struct protosw unixsw[] = {
>.pr_protocol   = PF_LOCAL,
>.pr_flags  = PR_ATOMIC|PR_ADDR|PR_RIGHTS,
>.pr_usrreq = uipc_usrreq,
> +  .pr_attach = uipc_attach,
>  }
>  };
>  
> Index: kern/uipc_socket.c
> ===
> RCS file: /cvs/src/sys/kern/uipc_socket.c,v
> retrieving revision 1.178
> diff -u -p -r1.178 uipc_socket.c
> --- kern/uipc_socket.c3 Mar 2017 09:41:20 -   1.178
> +++ kern/uipc_socket.c5 Mar 2017 09:39:34 -
> @@ -119,7 +119,7 @@ socreate(int dom, struct socket **aso, i
>   prp = pffindproto(dom, proto, type);
>   else
>   prp = pffindtype(dom, type);
> - if (prp == NULL || prp->pr_usrreq == 0)
> + if (prp == NULL || prp->pr_attach == NULL)
>   return (EPROTONOSUPPORT);
>   if (prp->pr_type != type)
>   return (EPROTOTYPE);
> @@ -135,9 +135,9 @@ socreate(int dom, struct socket **aso, i
>   so->so_egid = p->p_ucred->cr_gid;
>   so->so_cpid = p->p_p->ps_pid;
>   so->so_proto = prp;
> +
>   s = solock(so);
> - error = (*prp->pr_usrreq)(so, PRU_ATTACH, NULL,
> - (struct mbuf *)(long)proto, NULL, p);
> + error = (*prp->pr_attach)(so, proto);
>   if (error) {
>   so->so_state |= SS_NOFDREF;
>   sofree(so);
> Index: kern/uipc_socket2.c
> ===
> RCS file: /cvs/src/sys/kern/uipc_socket2.c,v
> retrieving revision 1.72
> diff -u -p -r1.72 uipc_socket2.c
> --- kern/uipc_socket2.c   14 Feb 2017 09:46:21 -  1.72
> +++ kern/uipc_socket2.c   25 Feb 2017 18:51:02 -
> @@ -187,8 +187,7 @@ sonewconn(struct socket *head, int conns
>   so->so_rcv.sb_timeo = head->so_rcv.sb_timeo;
>  
>   soqinsque(head, so, soqueue);
> - if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH, NULL, NULL, NULL,
> - curproc)) {
> + if ((*so->so_proto->pr_attach)(so, 0)) {
>   (void) soqremque(so, soqueue);
>   pool_put(_pool, so);
>   return (NULL);
> Index: kern/uipc_usrreq.c
> ===
> RCS file: /cvs/src/sys/kern/uipc_usrreq.c,v
> retrieving revision 1.116
> diff -u -p -r1.116 uipc_usrreq.c
> --- kern/uipc_usrreq.c14 Feb 2017 09:46:21 -  1.116
> +++ kern/uipc_usrreq.c26 Feb 2017 08:03:29 -
> @@ -117,7 +117,7 @@ uipc_usrreq(struct socket *so, int req, 
>   error = EOPNOTSUPP;
>   goto release;
>   }
> - if (unp == NULL && req != PRU_ATTACH) {
> + if (unp == NULL) {
>   error = EINVAL;
>   goto release;
>   }
> @@ -126,14 +126,6 @@ uipc_usrreq(struct socket *so, int req, 
>  
>   switch (req) {
>  
> - case PRU_ATTACH:
> - if (unp) {
> - error = EISCONN;
> - break;
> - }
> - error = unp_attach(so);
> - break;
> -
>   case PRU_DETACH:
>   unp_detach(unp);
>   break;
> @@ -351,11 +343,13 @@ u_long  unpdg_recvspace = 4*1024;
>  int  unp_rights; /* file 

Preserve original pf flow classification

2017-03-06 Thread Mike Belopuhov
There's no need to overwrite original flow ID assigned on
input so that it remains stable throughout the life of a
packet.  The rationale is that output processing may split,
encapsulate or obfuscate a single stream which makes the
changed flow ID useless for purposes of flow control, for
instance fair sharing of bandwidth, etc.

Is the diff below OK?

diff --git sys/net/pf.c sys/net/pf.c
index f64fdc4c80d..f3a5717be4a 100644
--- sys/net/pf.c
+++ sys/net/pf.c
@@ -6779,11 +6779,11 @@ done:
pd.m->m_pkthdr.pf.inp->inp_pf_sk =
pf_state_key_ref(s->key[PF_SK_STACK]);
s->key[PF_SK_STACK]->inp = pd.m->m_pkthdr.pf.inp;
}
 
-   if (s) {
+   if (s && (pd.m->m_pkthdr.ph_flowid & M_FLOWID_VALID) == 0) {
pd.m->m_pkthdr.ph_flowid = M_FLOWID_VALID |
(M_FLOWID_MASK & bemtoh64(>id));
}
 
/*



Re: PF_ROUTE vs NET_LOCK(), take 2

2017-03-06 Thread Alexander Bluhm
On Mon, Mar 06, 2017 at 01:33:19PM +0100, Martin Pieuchot wrote:
> After some refactoring, here's the updated diff to take the routing
> sockets out of the NET_LOCK().
> 
> ok?

OK bluhm@

> 
> Index: net/raw_usrreq.c
> ===
> RCS file: /cvs/src/sys/net/raw_usrreq.c,v
> retrieving revision 1.29
> diff -u -p -r1.29 raw_usrreq.c
> --- net/raw_usrreq.c  3 Mar 2017 15:48:02 -   1.29
> +++ net/raw_usrreq.c  6 Mar 2017 12:23:57 -
> @@ -63,7 +63,7 @@ raw_usrreq(struct socket *so, int req, s
>   int error = 0;
>   int len;
>  
> - NET_ASSERT_LOCKED();
> + soassertlocked(so);
>  
>   if (req == PRU_CONTROL)
>   return (EOPNOTSUPP);
> Index: net/route.h
> ===
> RCS file: /cvs/src/sys/net/route.h,v
> retrieving revision 1.159
> diff -u -p -r1.159 route.h
> --- net/route.h   6 Mar 2017 08:56:39 -   1.159
> +++ net/route.h   6 Mar 2017 12:26:14 -
> @@ -417,10 +417,6 @@ struct sockaddr_in6;
>  struct bfd_config;
>  
>  void  route_init(void);
> -int   route_output(struct mbuf *, struct socket *, struct sockaddr *,
> - struct mbuf *);
> -int   route_usrreq(struct socket *, int, struct mbuf *,
> -struct mbuf *, struct mbuf *, struct proc *);
>  void  rtm_ifchg(struct ifnet *);
>  void  rtm_ifannounce(struct ifnet *, int);
>  void  rtm_bfd(struct bfd_config *);
> Index: net/rtsock.c
> ===
> RCS file: /cvs/src/sys/net/rtsock.c,v
> retrieving revision 1.230
> diff -u -p -r1.230 rtsock.c
> --- net/rtsock.c  6 Mar 2017 10:19:17 -   1.230
> +++ net/rtsock.c  6 Mar 2017 12:28:11 -
> @@ -98,7 +98,11 @@ struct walkarg {
>   caddr_t w_where, w_tmem;
>  };
>  
> +int  route_output(struct mbuf *, struct socket *, struct sockaddr *,
> + struct mbuf *);
>  int  route_ctloutput(int, struct socket *, int, int, struct mbuf *);
> +int  route_usrreq(struct socket *, int, struct mbuf *, struct mbuf *,
> + struct mbuf *, struct proc *);
>  void route_input(struct mbuf *m0, sa_family_t);
>  int  route_arp_conflict(struct rtentry *, struct rt_addrinfo *);
>  int  route_cleargateway(struct rtentry *, void *, unsigned int);
> @@ -159,8 +163,6 @@ route_usrreq(struct socket *so, int req,
>   int  af;
>   int  error = 0;
>  
> - NET_ASSERT_LOCKED();
> -
>   rp = sotorawcb(so);
>  
>   switch (req) {
> @@ -345,6 +347,8 @@ route_input(struct mbuf *m0, sa_family_t
>   struct socket *last = NULL;
>   struct sockaddr *sosrc, *sodst;
>  
> + KERNEL_ASSERT_LOCKED();
> +
>   sosrc = _src;
>   sodst = _dst;
>  
> @@ -736,7 +740,9 @@ rtm_output(struct rt_msghdr *rtm, struct
>   struct sockaddr_mpls*psa_mpls;
>  #endif
>   int  plen, newgate = 0, error = 0;
> + int  s;
>  
> + NET_LOCK(s);
>   switch (rtm->rtm_type) {
>   case RTM_ADD:
>   if (info->rti_info[RTAX_GATEWAY] == NULL) {
> @@ -981,6 +987,8 @@ change:
>   error = ESRCH;
>   break;
>   }
> + NET_UNLOCK(s);
> +
>   *prt = rt;
>   return (error);
>  }
> Index: kern/uipc_socket.c
> ===
> RCS file: /cvs/src/sys/kern/uipc_socket.c,v
> retrieving revision 1.178
> diff -u -p -r1.178 uipc_socket.c
> --- kern/uipc_socket.c3 Mar 2017 09:41:20 -   1.178
> +++ kern/uipc_socket.c6 Mar 2017 12:23:57 -
> @@ -1038,11 +1038,12 @@ sorflush(struct socket *so)
>  {
>   struct sockbuf *sb = >so_rcv;
>   struct protosw *pr = so->so_proto;
> + sa_family_t af = pr->pr_domain->dom_family;
>   struct sockbuf asb;
>  
>   sb->sb_flags |= SB_NOINTR;
>   sblock(sb, M_WAITOK,
> - (pr->pr_domain->dom_family != PF_LOCAL) ?  : NULL);
> + (af != PF_LOCAL && af != PF_ROUTE) ?  : NULL);
>   socantrcvmore(so);
>   sbunlock(sb);
>   asb = *sb;
> Index: kern/uipc_socket2.c
> ===
> RCS file: /cvs/src/sys/kern/uipc_socket2.c,v
> retrieving revision 1.72
> diff -u -p -r1.72 uipc_socket2.c
> --- kern/uipc_socket2.c   14 Feb 2017 09:46:21 -  1.72
> +++ kern/uipc_socket2.c   6 Mar 2017 12:23:57 -
> @@ -273,7 +273,8 @@ solock(struct socket *so)
>  {
>   int s;
>  
> - if (so->so_proto->pr_domain->dom_family != PF_LOCAL)
> + if ((so->so_proto->pr_domain->dom_family != PF_LOCAL) &&
> + (so->so_proto->pr_domain->dom_family != PF_ROUTE))
>   NET_LOCK(s);
>   else
>   s = -42;
> @@ -291,16 +292,18 @@ sounlock(int s)
>  void
>  soassertlocked(struct socket *so)
>  {
> - if (so->so_proto->pr_domain->dom_family != PF_LOCAL)
> + if 

Re: route_output M_WAITOK

2017-03-06 Thread Alexander Bluhm
On Sun, Mar 05, 2017 at 12:22:57PM +0100, Claudio Jeker wrote:
> On Fri, Mar 03, 2017 at 08:16:43AM +0100, Martin Pieuchot wrote:
> > On 03/03/17(Fri) 01:40, Alexander Bluhm wrote:
> > > On Thu, Mar 02, 2017 at 10:55:41AM +0100, Martin Pieuchot wrote:
> > > > Actually the malloc(9) could be converted to M_WAIT like the one in
> > > > rt_report() that claudio@ commented like that: 
> > > 
> > > So let's do that.  This avoids that we loose routing messages due
> > > to low memory.
> > > 
> > > Note that we cannot wait in m_copyback() as rp->rcb_proto.sp_family
> > > is temporarily set to 0 while calling route_input().
> > 
> > It'd be nice to change route_input() to get rid of this hack.
> > 
> 
> How about something like this.

OK bluhm@

> 
> -- 
> :wq Claudio
> 
> 
> Index: net/rtsock.c
> ===
> RCS file: /cvs/src/sys/net/rtsock.c,v
> retrieving revision 1.228
> diff -u -p -r1.228 rtsock.c
> --- net/rtsock.c  3 Mar 2017 15:48:02 -   1.228
> +++ net/rtsock.c  5 Mar 2017 10:39:11 -
> @@ -99,7 +99,7 @@ struct walkarg {
>  };
>  
>  int  route_ctloutput(int, struct socket *, int, int, struct mbuf *);
> -void route_input(struct mbuf *m0, sa_family_t);
> +void route_input(struct mbuf *m0, struct socket *, sa_family_t);
>  int  route_arp_conflict(struct rtentry *, struct rt_addrinfo *);
>  int  route_cleargateway(struct rtentry *, void *, unsigned int);
>  
> @@ -332,7 +332,7 @@ rt_senddesync(void *data)
>  }
>  
>  void
> -route_input(struct mbuf *m0, sa_family_t sa_family)
> +route_input(struct mbuf *m0, struct socket *so, sa_family_t sa_family)
>  {
>   struct rawcb *rp;
>   struct routecb *rop;
> @@ -356,6 +356,10 @@ route_input(struct mbuf *m0, sa_family_t
>   continue;
>   if (rp->rcb_proto.sp_family != PF_ROUTE)
>   continue;
> + /* Check to see if we don't want our own messages. */
> + if (so == rp->rcb_socket && !(so->so_options & SO_USELOOPBACK))
> + continue;
> +
>   /*
>* If route socket is bound to an address family only send
>* messages that match the address family. Address family
> @@ -537,7 +541,6 @@ route_output(struct mbuf *m, struct sock
>   int  plen, len, seq, newgate = 0, error = 0;
>   struct ifnet*ifp = NULL;
>   struct ifaddr   *ifa = NULL;
> - struct rawcb*rp = NULL;
>  #ifdef MPLS
>   struct sockaddr_mpls*psa_mpls;
>  #endif
> @@ -953,9 +956,7 @@ flush:
>   rtm->rtm_flags |= RTF_DONE;
>   }
>  
> - /*
> -  * Check to see if we don't want our own messages.
> -  */
> + /* Check to see if we don't want our own messages. */
>   if (!(so->so_options & SO_USELOOPBACK)) {
>   if (route_cb.any_count <= 1) {
>   /* no other listener and no loopback of messages */
> @@ -964,9 +965,6 @@ fail:
>   m_freem(m);
>   return (error);
>   }
> - /* There is another listener, so construct message */
> - rp = sotorawcb(so);
> - rp->rcb_proto.sp_family = 0; /* Avoid us */
>   }
>   if (rtm) {
>   if (m_copyback(m, 0, rtm->rtm_msglen, rtm, M_NOWAIT)) {
> @@ -977,10 +975,8 @@ fail:
>   free(rtm, M_RTABLE, 0);
>   }
>   if (m)
> - route_input(m, info.rti_info[RTAX_DST] ?
> + route_input(m, so, info.rti_info[RTAX_DST] ?
>   info.rti_info[RTAX_DST]->sa_family : AF_UNSPEC);
> - if (rp)
> - rp->rcb_proto.sp_family = PF_ROUTE; /* Readd us */
>  
>   return (error);
>  }
> @@ -1253,7 +1249,7 @@ rt_missmsg(int type, struct rt_addrinfo 
>   rtm->rtm_tableid = tableid;
>   rtm->rtm_addrs = rtinfo->rti_addrs;
>   rtm->rtm_index = ifidx;
> - route_input(m, sa ? sa->sa_family : AF_UNSPEC);
> + route_input(m, NULL, sa ? sa->sa_family : AF_UNSPEC);
>  }
>  
>  /*
> @@ -1278,7 +1274,7 @@ rt_ifmsg(struct ifnet *ifp)
>   ifm->ifm_xflags = ifp->if_xflags;
>   if_getdata(ifp, >ifm_data);
>   ifm->ifm_addrs = 0;
> - route_input(m, AF_UNSPEC);
> + route_input(m, NULL, AF_UNSPEC);
>  }
>  
>  /*
> @@ -1314,7 +1310,8 @@ rt_sendaddrmsg(struct rtentry *rt, int c
>   ifam->ifam_addrs = info.rti_addrs;
>   ifam->ifam_tableid = ifp->if_rdomain;
>  
> - route_input(m, ifa->ifa_addr ? ifa->ifa_addr->sa_family : AF_UNSPEC);
> + route_input(m, NULL,
> + ifa->ifa_addr ? ifa->ifa_addr->sa_family : AF_UNSPEC);
>  }
>  
>  /*
> @@ -1336,7 +1333,7 @@ rt_ifannouncemsg(struct ifnet *ifp, int 
>   ifan->ifan_index = ifp->if_index;
>   strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name));
>   ifan->ifan_what = what;
> - route_input(m, AF_UNSPEC);
> + route_input(m, NULL, AF_UNSPEC);
>  }
>  
>  #ifdef 

Re: integer overflow in PF may lead to dropped connections

2017-03-06 Thread Mike Belopuhov
On Mon, Mar 06, 2017 at 14:46 +0100, Claudio Jeker wrote:
> On Mon, Mar 06, 2017 at 01:04:28PM +0100, Mike Belopuhov wrote:
> > On Fri, Mar 03, 2017 at 16:58 +0100, mathieu.bl...@cea.fr wrote:
> > > Hi,
> > > 
> > > here is a patch which fixes an integer overflow in the computation of
> > > adaptive timeouts. The effect of this integer overflow is that
> > > depending on timeout values, legitimate established connection states
> > > can be evicted immediately (although other states with shorter
> > > timeouts would be available but that's irrelevant).
> > > 
> > > Examples of such values would be:
> > > 
> > > timeout tcp.established 86400s (default)
> > > limit states 20
> > > 
> > > leads to a computed timeout of exactly 0 when the number of states is
> > > 140579.
> > > 
> > > While this condition is true, if the flush routine stumbles upon an
> > > established connection, it is immediately dropped. This is not
> > > guaranteed to happen because the flush routine checks only
> > > (100/interval)% of the states each second.
> > > 
> > > Here are values for a lower number of states in order to help
> > > reproduce the problem:
> > > 
> > > timeout tcp.established 1048576s
> > > limit states 6830
> > > 
> > > the 4100th state may lead to the eviction of an established connection.
> > > 
> > > We tried to provide the least intrusive patch and it works for us;
> > > feel free to use it.
> > > 
> > > Regards,
> > > Mathieu
> > > 
> > > 
> > > --- sys/net/pf.c  Mon Jul 18 15:17:44 2016
> > > +++ sys/net/pf.c  Fri Jan 20 13:10:04 2017
> > > @@ -1223,7 +1223,7 @@
> > >   if (states >= end)
> > >   return (0);
> > > 
> > > - timeout = timeout * (end - states) / (end - start);
> > > + timeout = (u_int64_t) timeout * (end - states) / (end - start);
> > >   }
> > > 
> > >   return (state->expire + timeout);
> > > EOF
> > > 
> > 
> > I think your diff looks alright.  I've double checked that it fixes the
> > overflow you're describing.
> > 
> 
> Agreed. I was toying a bit more around and came up with this version. Not
> sure if it is better. The idea is that (end - states) is very large when
> walking into adaptive timeouts and shrinking. I changed it to (states -
> start) which grows the bigger the table becomes.
> Also using more u_int32_t seems to be a good idea.
>

In several (start, end) ranges of number of states that I've tested,
there's virtually no difference between these two versions so I'd
rather not complicate the code unless there's no compelling reason
to do so.  Do you have a test in mind where this avoids some corner
case?

Either way, your or the submitted version are OK mikeb.

> -- 
> :wq Claudio
> 
> Index: net/pf.c
> ===
> RCS file: /cvs/src/sys/net/pf.c,v
> retrieving revision 1.1015
> diff -u -p -r1.1015 pf.c
> --- net/pf.c  9 Feb 2017 15:19:32 -   1.1015
> +++ net/pf.c  6 Mar 2017 13:40:23 -
> @@ -1199,7 +1199,7 @@ pf_purge_thread(void *v)
>  int32_t
>  pf_state_expires(const struct pf_state *state)
>  {
> - int32_t timeout;
> + u_int32_t   timeout;
>   u_int32_t   start;
>   u_int32_t   end;
>   u_int32_t   states;
> @@ -1225,10 +1225,16 @@ pf_state_expires(const struct pf_state *
>   states = pf_status.states;
>   }
>   if (end && states > start && start < end) {
> + u_int64_t   t;
> +
>   if (states >= end)
>   return (0);
>  
> - timeout = timeout * (end - states) / (end - start);
> + t = (u_int64_t)timeout * (states - start) / (end - start);
> + if (t >= timeout)
> + timeout = 0;
> + else
> + timeout -= t;
>   }
>  
>   return (state->expire + timeout);



PF_ROUTE vs NET_LOCK(), take 2

2017-03-06 Thread Martin Pieuchot
After some refactoring, here's the updated diff to take the routing
sockets out of the NET_LOCK().

ok?

Index: net/raw_usrreq.c
===
RCS file: /cvs/src/sys/net/raw_usrreq.c,v
retrieving revision 1.29
diff -u -p -r1.29 raw_usrreq.c
--- net/raw_usrreq.c3 Mar 2017 15:48:02 -   1.29
+++ net/raw_usrreq.c6 Mar 2017 12:23:57 -
@@ -63,7 +63,7 @@ raw_usrreq(struct socket *so, int req, s
int error = 0;
int len;
 
-   NET_ASSERT_LOCKED();
+   soassertlocked(so);
 
if (req == PRU_CONTROL)
return (EOPNOTSUPP);
Index: net/route.h
===
RCS file: /cvs/src/sys/net/route.h,v
retrieving revision 1.159
diff -u -p -r1.159 route.h
--- net/route.h 6 Mar 2017 08:56:39 -   1.159
+++ net/route.h 6 Mar 2017 12:26:14 -
@@ -417,10 +417,6 @@ struct sockaddr_in6;
 struct bfd_config;
 
 voidroute_init(void);
-int route_output(struct mbuf *, struct socket *, struct sockaddr *,
-   struct mbuf *);
-int route_usrreq(struct socket *, int, struct mbuf *,
-  struct mbuf *, struct mbuf *, struct proc *);
 voidrtm_ifchg(struct ifnet *);
 voidrtm_ifannounce(struct ifnet *, int);
 voidrtm_bfd(struct bfd_config *);
Index: net/rtsock.c
===
RCS file: /cvs/src/sys/net/rtsock.c,v
retrieving revision 1.230
diff -u -p -r1.230 rtsock.c
--- net/rtsock.c6 Mar 2017 10:19:17 -   1.230
+++ net/rtsock.c6 Mar 2017 12:28:11 -
@@ -98,7 +98,11 @@ struct walkarg {
caddr_t w_where, w_tmem;
 };
 
+introute_output(struct mbuf *, struct socket *, struct sockaddr *,
+   struct mbuf *);
 introute_ctloutput(int, struct socket *, int, int, struct mbuf *);
+introute_usrreq(struct socket *, int, struct mbuf *, struct mbuf *,
+   struct mbuf *, struct proc *);
 void   route_input(struct mbuf *m0, sa_family_t);
 introute_arp_conflict(struct rtentry *, struct rt_addrinfo *);
 introute_cleargateway(struct rtentry *, void *, unsigned int);
@@ -159,8 +163,6 @@ route_usrreq(struct socket *so, int req,
int  af;
int  error = 0;
 
-   NET_ASSERT_LOCKED();
-
rp = sotorawcb(so);
 
switch (req) {
@@ -345,6 +347,8 @@ route_input(struct mbuf *m0, sa_family_t
struct socket *last = NULL;
struct sockaddr *sosrc, *sodst;
 
+   KERNEL_ASSERT_LOCKED();
+
sosrc = _src;
sodst = _dst;
 
@@ -736,7 +740,9 @@ rtm_output(struct rt_msghdr *rtm, struct
struct sockaddr_mpls*psa_mpls;
 #endif
int  plen, newgate = 0, error = 0;
+   int  s;
 
+   NET_LOCK(s);
switch (rtm->rtm_type) {
case RTM_ADD:
if (info->rti_info[RTAX_GATEWAY] == NULL) {
@@ -981,6 +987,8 @@ change:
error = ESRCH;
break;
}
+   NET_UNLOCK(s);
+
*prt = rt;
return (error);
 }
Index: kern/uipc_socket.c
===
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.178
diff -u -p -r1.178 uipc_socket.c
--- kern/uipc_socket.c  3 Mar 2017 09:41:20 -   1.178
+++ kern/uipc_socket.c  6 Mar 2017 12:23:57 -
@@ -1038,11 +1038,12 @@ sorflush(struct socket *so)
 {
struct sockbuf *sb = >so_rcv;
struct protosw *pr = so->so_proto;
+   sa_family_t af = pr->pr_domain->dom_family;
struct sockbuf asb;
 
sb->sb_flags |= SB_NOINTR;
sblock(sb, M_WAITOK,
-   (pr->pr_domain->dom_family != PF_LOCAL) ?  : NULL);
+   (af != PF_LOCAL && af != PF_ROUTE) ?  : NULL);
socantrcvmore(so);
sbunlock(sb);
asb = *sb;
Index: kern/uipc_socket2.c
===
RCS file: /cvs/src/sys/kern/uipc_socket2.c,v
retrieving revision 1.72
diff -u -p -r1.72 uipc_socket2.c
--- kern/uipc_socket2.c 14 Feb 2017 09:46:21 -  1.72
+++ kern/uipc_socket2.c 6 Mar 2017 12:23:57 -
@@ -273,7 +273,8 @@ solock(struct socket *so)
 {
int s;
 
-   if (so->so_proto->pr_domain->dom_family != PF_LOCAL)
+   if ((so->so_proto->pr_domain->dom_family != PF_LOCAL) &&
+   (so->so_proto->pr_domain->dom_family != PF_ROUTE))
NET_LOCK(s);
else
s = -42;
@@ -291,16 +292,18 @@ sounlock(int s)
 void
 soassertlocked(struct socket *so)
 {
-   if (so->so_proto->pr_domain->dom_family != PF_LOCAL)
+   if ((so->so_proto->pr_domain->dom_family != PF_LOCAL) &&
+   (so->so_proto->pr_domain->dom_family != PF_ROUTE))
NET_ASSERT_LOCKED();
 }
 
 int
 sosleep(struct socket *so, void *ident, int prio, const char *wmesg, int timo)
 {
-   if 

Re: integer overflow in PF may lead to dropped connections

2017-03-06 Thread Mike Belopuhov
On Fri, Mar 03, 2017 at 16:58 +0100, mathieu.bl...@cea.fr wrote:
> Hi,
> 
> here is a patch which fixes an integer overflow in the computation of
> adaptive timeouts. The effect of this integer overflow is that
> depending on timeout values, legitimate established connection states
> can be evicted immediately (although other states with shorter
> timeouts would be available but that's irrelevant).
> 
> Examples of such values would be:
> 
> timeout tcp.established 86400s (default)
> limit states 20
> 
> leads to a computed timeout of exactly 0 when the number of states is
> 140579.
> 
> While this condition is true, if the flush routine stumbles upon an
> established connection, it is immediately dropped. This is not
> guaranteed to happen because the flush routine checks only
> (100/interval)% of the states each second.
> 
> Here are values for a lower number of states in order to help
> reproduce the problem:
> 
> timeout tcp.established 1048576s
> limit states 6830
> 
> the 4100th state may lead to the eviction of an established connection.
> 
> We tried to provide the least intrusive patch and it works for us;
> feel free to use it.
> 
> Regards,
> Mathieu
> 
> 
> --- sys/net/pf.c  Mon Jul 18 15:17:44 2016
> +++ sys/net/pf.c  Fri Jan 20 13:10:04 2017
> @@ -1223,7 +1223,7 @@
>   if (states >= end)
>   return (0);
> 
> - timeout = timeout * (end - states) / (end - start);
> + timeout = (u_int64_t) timeout * (end - states) / (end - start);
>   }
> 
>   return (state->expire + timeout);
> EOF
> 

I think your diff looks alright.  I've double checked that it fixes the
overflow you're describing.



{tc,ud}btable & NET_LOCK()

2017-03-06 Thread Martin Pieuchot
In sysctl_file() splnet() is no longer what we want.  Arguably the
KERNEL_LOCK() should be enough, but since pf(4) is messing with these
tables, and nobody turned this part of pf(4) MP, let's require the
NET_LOCK().

ok?

Index: kern/kern_sysctl.c
===
RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
retrieving revision 1.321
diff -u -p -r1.321 kern_sysctl.c
--- kern/kern_sysctl.c  21 Jan 2017 05:42:03 -  1.321
+++ kern/kern_sysctl.c  6 Mar 2017 11:08:41 -
@@ -1284,7 +1284,7 @@ sysctl_file(int *name, u_int namelen, ch
struct inpcb *inp;
int s;
 
-   s = splnet();
+   NET_LOCK(s);
TAILQ_FOREACH(inp, _queue, inp_queue)
FILLSO(inp->inp_socket);
TAILQ_FOREACH(inp, _queue, inp_queue)
@@ -1296,7 +1296,7 @@ sysctl_file(int *name, u_int namelen, ch
inp_queue)
FILLSO(inp->inp_socket);
 #endif
-   splx(s);
+   NET_UNLOCK(s);
}
fp = LIST_FIRST();
/* don't FREF when f_count == 0 to avoid race in fdrop() */
Index: net/pf.c
===
RCS file: /cvs/src/sys/net/pf.c,v
retrieving revision 1.1015
diff -u -p -r1.1015 pf.c
--- net/pf.c9 Feb 2017 15:19:32 -   1.1015
+++ net/pf.c6 Mar 2017 11:08:18 -
@@ -3143,11 +3143,13 @@ pf_socket_lookup(struct pf_pdesc *pd)
case IPPROTO_TCP:
sport = pd->hdr.tcp.th_sport;
dport = pd->hdr.tcp.th_dport;
+   NET_ASSERT_LOCKED();
tb = 
break;
case IPPROTO_UDP:
sport = pd->hdr.udp.uh_sport;
dport = pd->hdr.udp.uh_dport;
+   NET_ASSERT_LOCKED();
tb = 
break;
default:



Re: 11n support for athn(4)

2017-03-06 Thread Stefan Sperling
On Mon, Mar 06, 2017 at 07:36:21AM +0200, Timo Myyrä wrote:
> Did some tcpbench testing and got following results:
> Each test run with: tcpbench -s || tcpbench -t 15  commands.
> Host AP: apu 2b4 with athn, client = thinkpad t430s with iwn (OpenBSD)
> 
> channel 9 running old snapshot etc:
> 11n client -> server ~4, server -> ~0,
> 11g client -> server ~16, server -> client ~0-6mbs
> ---
> updated to new snapshot: OpenBSD 6.0-current (GENERIC.MP) #206: Sat Mar  4 
> 09:22:00 MST 2017
> = added another antenna, moved the ap to better spot, switched old ap off, 
> changed channel to 11
> 11n client -> server: ~6mbps, server -> client ~0.2mbps
> 11g client -> server: ~7mbps, server -> client ~3mbps
> 
> ---
> switch to channel 3:
> 11n client -> server: ~7mbps, server -> client: ~0-5mbps
> 11g client -> server: 16mbps, server -> client: ~5mbps
> 
> ---
> applied dyn rts patch:
> 11n client -> server: 4-7mbps, server -> client 0.2-5.5mbps
> 11g client -> server: ~4mbps, server -> client: ~5.5mbps

What made 11n go down from 16 to 4?
11g is not affected by this patch so something else affected 11g.
Could it be other networks on overlapping channels?
 
To tell whether the patch has any effect in your case I would like to
know which HT protection setting your AP is using.

Find a snapshot dated a bit after 2017/03/04 10:51:20 MST, or make sure tcpdump
sources are -current and rebuild and install tcpdump. Associate to the AP,
and run: tcpdump -n -i iwn0 -y IEEE802_11_RADIO -s 4096 -v -l | grep beacon
and in htop=<...> look for 'htprot'. If it shows 'htprot none' then dynamic
RTS is used in 11n mode (i.e. my patch will switch RTS on/off as needed).
Otherwise, you have some pre-11n devices in your environment so RTS must
always be enabled and my patch makes no difference.

Note that the iwn(4) driver does not yet support MIMO in 11n mode.
Once it does, 11n should become faster than 11g. I have seen an iwm(4) client
which supports MIMO max out at 21 Mbps tcpbench towards my athn(4) AP, on a
5GHz channel with 'htprot none'.
Unfortunately, tcpbench in the other direction (athn -> iwm) peaks at 10 Mbps.
There is plenty of room for improvement.

> At least what pops up in the measurements is that 11g gives more stable
> behaviour. 11n speed seems to vary a lot in that 15s test compared to 11g.

This could be explained by differences in rate scaling algorithms.
In -current 11g uses AMRR, and 11n mode uses MiRa which jumps around more.
In 6.0 everything used AMRR so a comparing how a 6.0 iwn client performs
in 11n mode would be useful (you could e.g. install 6.0 to a USB stick
and boot from it for running a speed test).



integer overflow in PF may lead to dropped connections

2017-03-06 Thread mathieu.blanc
Hi,

here is a patch which fixes an integer overflow in the computation of
adaptive timeouts. The effect of this integer overflow is that
depending on timeout values, legitimate established connection states
can be evicted immediately (although other states with shorter
timeouts would be available but that's irrelevant).

Examples of such values would be:

timeout tcp.established 86400s (default)
limit states 20

leads to a computed timeout of exactly 0 when the number of states is
140579.

While this condition is true, if the flush routine stumbles upon an
established connection, it is immediately dropped. This is not
guaranteed to happen because the flush routine checks only
(100/interval)% of the states each second.

Here are values for a lower number of states in order to help
reproduce the problem:

timeout tcp.established 1048576s
limit states 6830

the 4100th state may lead to the eviction of an established connection.

We tried to provide the least intrusive patch and it works for us;
feel free to use it.

Regards,
Mathieu


--- sys/net/pf.cMon Jul 18 15:17:44 2016
+++ sys/net/pf.cFri Jan 20 13:10:04 2017
@@ -1223,7 +1223,7 @@
if (states >= end)
return (0);

-   timeout = timeout * (end - states) / (end - start);
+   timeout = (u_int64_t) timeout * (end - states) / (end - start);
}

return (state->expire + timeout);
EOF