sparc* ld.so: delete obsolete __plt_{start,end} handling

2016-08-08 Thread Philip Guenther

As noted by kettenis@, all sparc64 binaries from 6.0-release forward 
should have their .plt segments marked as RWX, and thus initially mapped 
as RW but updated to RX after relocation.  Given that, mprotecting 
[__plt_start, __plt_end) is no longer necessary there.

I've eyeballed the readelf output of the most recent sparc binaries and 
they follow this rule as well.

So, continuing my stealing of good ideas from kettenis@, let's delete the 
obsolete code for those.

ok?

Philip


Index: sparc/rtld_machine.c
===
RCS file: /cvs/src/libexec/ld.so/sparc/rtld_machine.c,v
retrieving revision 1.44
diff -u -p -r1.44 rtld_machine.c
--- sparc/rtld_machine.c21 Jun 2016 15:25:38 -  1.44
+++ sparc/rtld_machine.c9 Aug 2016 05:09:29 -
@@ -455,10 +455,6 @@ _dl_md_reloc_got(elf_object_t *object, i
/* mprotect the GOT */
_dl_protect_segment(object, 0, "__got_start", "__got_end", PROT_READ);
 
-   /* mprotect the PLT */
-   _dl_protect_segment(object, 0, "__plt_start", "__plt_end",
-   PROT_READ|PROT_EXEC);
-
return (fails);
 }
 
Index: sparc64/rtld_machine.c
===
RCS file: /cvs/src/libexec/ld.so/sparc64/rtld_machine.c,v
retrieving revision 1.58
diff -u -p -r1.58 rtld_machine.c
--- sparc64/rtld_machine.c  21 Jun 2016 15:25:38 -  1.58
+++ sparc64/rtld_machine.c  9 Aug 2016 05:09:29 -
@@ -847,10 +847,6 @@ _dl_md_reloc_got(elf_object_t *object, i
if (object->traced)
lazy = 1;
 
-   /* temporarily make the PLT writable */
-   _dl_protect_segment(object, 0, "__plt_start", "__plt_end",
-   PROT_READ|PROT_WRITE);
-
if (!lazy) {
fails = _dl_md_reloc_all_plt(object);
} else {
@@ -862,10 +858,6 @@ _dl_md_reloc_got(elf_object_t *object, i
 
/* mprotect the GOT */
_dl_protect_segment(object, 0, "__got_start", "__got_end", PROT_READ);
-
-   /* mprotect the PLT */
-   _dl_protect_segment(object, 0, "__plt_start", "__plt_end",
-   PROT_READ|PROT_EXEC);
 
return (fails);
 }



ral(4) support RT5390 and RT5392

2016-08-08 Thread James Hastings
Hi all,

The following patch adds RT5390/RT5392 support to ral(4).

Ported from FreeBSD r278551 and r36.

Running smoothly with RT3090 and various RT5390 cards.

Requires updated ral-rt2860 firmware

Index: ral.4
===
RCS file: /cvs/src/share/man/man4/ral.4,v
retrieving revision 1.109
diff -u -p -r1.109 ral.4
--- ral.4   16 Jun 2016 17:25:32 -  1.109
+++ ral.4   9 Aug 2016 01:16:11 -
@@ -27,7 +27,7 @@
 The
 .Nm
 driver supports PCI/PCIe/CardBus wireless adapters based on the Ralink RT2500,
-RT2501, RT2600, RT2700, RT2800 and RT3090 chipsets.
+RT2501, RT2600, RT2700, RT2800, RT3090 and RT3900E chipsets.
 .Pp
 The RT2500 chipset is the first generation of 802.11b/g adapters from Ralink.
 It consists of two integrated chips, an RT2560 MAC/BBP and an RT2525 radio
@@ -64,6 +64,13 @@ bandwidth.)
 The RT3090 chipset is the first generation of single-chip 802.11n adapters
 from Ralink.
 .Pp
+The RT3900E chipset is a single-chip 802.11n adapter from Ralink.
+The MAC/Baseband Processor can be an RT5390 or RT5392.
+The RT5390 chip operates in the 2GHz spectrum and supports one transmit path
+and one receiver path (1T1R).
+The RT5392 chip operates in the 2GHz spectrum and supports up to two transmit
+paths and two receiver paths (2T2R).
+.Pp
 These are the modes the
 .Nm
 driver can operate in:
Index: pcidevs
===
RCS file: /cvs/src/sys/dev/pci/pcidevs,v
retrieving revision 1.1802
diff -u -p -r1.1802 pcidevs
--- pcidevs 31 Jul 2016 07:36:16 -  1.1802
+++ pcidevs 9 Aug 2016 02:40:33 -
@@ -6436,6 +6436,10 @@ product RALINK RT35620x3562  RT3562
 product RALINK RT3592  0x3592  RT3592
 product RALINK RT3593  0x3593  RT3593
 product RALINK RT5390  0x5390  RT5390
+product RALINK RT5392  0x5392  RT5392
+product RALINK RT5390_10x539a  RT5390
+product RALINK RT5390_20x539b  RT5390
+product RALINK RT5390_30x539f  RT5390

 /* RDC products */
 product RDC R1010_IDE  0x1010  R1010 IDE
Index: pcidevs.h
===
RCS file: /cvs/src/sys/dev/pci/pcidevs.h,v
retrieving revision 1.1796
diff -u -p -r1.1796 pcidevs.h
--- pcidevs.h   31 Jul 2016 07:37:04 -  1.1796
+++ pcidevs.h   9 Aug 2016 03:13:34 -
@@ -6441,6 +6441,10 @@
 #definePCI_PRODUCT_RALINK_RT3592   0x3592  /* RT3592 */
 #definePCI_PRODUCT_RALINK_RT3593   0x3593  /* RT3593 */
 #definePCI_PRODUCT_RALINK_RT5390   0x5390  /* RT5390 */
+#definePCI_PRODUCT_RALINK_RT5392   0x5392  /* RT5392 */
+#definePCI_PRODUCT_RALINK_RT5390_1 0x539a  /* RT5390 */
+#definePCI_PRODUCT_RALINK_RT5390_2 0x539b  /* RT5390 */
+#definePCI_PRODUCT_RALINK_RT5390_3 0x539f  /* RT5390 */

 /* RDC products */
 #definePCI_PRODUCT_RDC_R1010_IDE   0x1010  /* R1010 IDE */
Index: pcidevs_data.h
===
RCS file: /cvs/src/sys/dev/pci/pcidevs_data.h,v
retrieving revision 1.1791
diff -u -p -r1.1791 pcidevs_data.h
--- pcidevs_data.h  31 Jul 2016 07:37:04 -  1.1791
+++ pcidevs_data.h  9 Aug 2016 03:14:17 -
@@ -22496,6 +22496,22 @@ static const struct pci_known_product pc
"RT5390",
},
{
+   PCI_VENDOR_RALINK, PCI_PRODUCT_RALINK_RT5392,
+   "RT5392",
+   },
+   {
+   PCI_VENDOR_RALINK, PCI_PRODUCT_RALINK_RT5390_1,
+   "RT5390",
+   },
+   {
+   PCI_VENDOR_RALINK, PCI_PRODUCT_RALINK_RT5390_2,
+   "RT5390",
+   },
+   {
+   PCI_VENDOR_RALINK, PCI_PRODUCT_RALINK_RT5390_3,
+   "RT5390",
+   },
+   {
PCI_VENDOR_RDC, PCI_PRODUCT_RDC_R1010_IDE,
"R1010 IDE",
},
Index: if_ral_pci.c
===
RCS file: /cvs/src/sys/dev/pci/if_ral_pci.c,v
retrieving revision 1.24
diff -u -p -r1.24 if_ral_pci.c
--- if_ral_pci.c24 Nov 2015 17:11:39 -  1.24
+++ if_ral_pci.c9 Aug 2016 02:45:18 -
@@ -135,7 +135,12 @@ const struct pci_matchid ral_pci_devices
{ PCI_VENDOR_RALINK, PCI_PRODUCT_RALINK_RT3092 },
{ PCI_VENDOR_RALINK, PCI_PRODUCT_RALINK_RT3562 },
{ PCI_VENDOR_RALINK, PCI_PRODUCT_RALINK_RT3592 },
-   { PCI_VENDOR_RALINK, PCI_PRODUCT_RALINK_RT3593 }
+   { PCI_VENDOR_RALINK, PCI_PRODUCT_RALINK_RT3593 },
+   { PCI_VENDOR_RALINK, PCI_PRODUCT_RALINK_RT5390 },
+   { PCI_VENDOR_RALINK, PCI_PRODUCT_RALINK_RT5392 },
+   { PCI_VENDOR_RALINK, PCI_PRODUCT_RALINK_RT5390_1 },
+   { PCI_VENDOR_RALINK, PCI_PRODUCT_RALINK_RT5390_2 },
+   { PCI_VENDOR_RALINK, PCI_PRODUCT_RALINK_RT5390_3 }
 };

 int
Index: rt286

Re: pmap7.c cleanup

2016-08-08 Thread Daniel Bolgheroni
On Mon, Aug 08, 2016 at 09:17:39PM +0200, Mark Kettenis wrote:
> This diff should not introduce any change in behaviour.

Another round tested with a kernel build and works for me on BeagleBone
Black.

-- 
db



Re: move ufs_vinit() to ffs

2016-08-08 Thread Ted Unangst
Martin Natano wrote:
> The ufs_vinit() function should really be called ffs_vinit(). The only
> place it is called from is ffs_vget(). And again, the FIFOOPS macro can
> be killed.
> 
> Ok?

ok



Re: ext2fs_vinit() cleanup

2016-08-08 Thread Ted Unangst
Martin Natano wrote:
> ext2fs has only one set of specops/fifoops, so no need to pass those to
> the function. This also allows to get rid of the EXT2FS_FIFOOPS define.
> 
> Ok?

ok



Re: Update: FreeType 2.6.5

2016-08-08 Thread Matthieu Herrb
On Thu, Aug 04, 2016 at 10:55:29AM +0200, David Coppa wrote:
> 
> Here's the update to the latest freetype.
> 
> $ check_sym
> /usr/X11R6/lib/libfreetype.so.25.0 --> obj/libfreetype.so.25.0
> Dynamic export changes:
> added:
>   af_armn_dflt_style_class
>   af_armn_nonbase_uniranges
>   af_armn_script_class
>   af_armn_uniranges
>   af_cher_dflt_style_class
>   af_cher_nonbase_uniranges
>   af_cher_script_class
>   af_cher_uniranges
>   af_ethi_dflt_style_class
>   af_ethi_nonbase_uniranges
>   af_ethi_script_class
>   af_ethi_uniranges
>   af_geok_dflt_style_class
>   af_geok_nonbase_uniranges
>   af_geok_script_class
>   af_geok_uniranges
>   af_geor_dflt_style_class
>   af_geor_nonbase_uniranges
>   af_geor_script_class
>   af_geor_uniranges
> 
> And thus I've bumped minor to 1.
> 
> Ciao!
> David
> 
> Diff is also attached, gzipped.

ok matthieu@.
-- 
Matthieu Herrb


signature.asc
Description: PGP signature


pmap7.c cleanup

2016-08-08 Thread Mark Kettenis
The page tables are cached now, and given the significant speedup, I
don't think we'll ever go back.  So let's ditch the code that tries to
check and patch up incorrect memory attributes.

Also realize that pmap_clean_page(pg, FALSE) doesn't do anything
anymore so remove those calls and drop the 2nd argument from
pmap_clean_page(pg, TRUE) calls.

Last but not least, get rid of pmap_pte_init_generic() here.  The only
useful thing it did was setting pmap_copy_page_func() and
pmap_zero_page_func().

This diff should not introduce any change in behaviour.

ok?


Index: pmap7.c
===
RCS file: /cvs/src/sys/arch/arm/arm/pmap7.c,v
retrieving revision 1.35
diff -u -p -r1.35 pmap7.c
--- pmap7.c 8 Aug 2016 14:47:52 -   1.35
+++ pmap7.c 8 Aug 2016 19:06:20 -
@@ -378,7 +378,6 @@ struct pv_entry {
 /*
  * Local prototypes
  */
-intpmap_set_pt_cache_mode(pd_entry_t *, vaddr_t);
 void   pmap_alloc_specials(vaddr_t *, int, vaddr_t *,
pt_entry_t **);
 static boolean_t   pmap_is_current(pmap_t);
@@ -395,10 +394,9 @@ void   pmap_free_l1(pmap_t);
 struct l2_bucket *pmap_get_l2_bucket(pmap_t, vaddr_t);
 struct l2_bucket *pmap_alloc_l2_bucket(pmap_t, vaddr_t);
 void   pmap_free_l2_bucket(pmap_t, struct l2_bucket *, u_int);
-void   pmap_l2ptp_ctor(void *);
 
 void   pmap_clearbit(struct vm_page *, u_int);
-void   pmap_clean_page(struct vm_page *, int);
+void   pmap_clean_page(struct vm_page *);
 void   pmap_page_remove(struct vm_page *);
 
 void   pmap_init_l1(struct l1_ttable *, pd_entry_t *);
@@ -626,12 +624,10 @@ uint nl1;
 void
 pmap_alloc_l1(pmap_t pm, int domain)
 {
-   struct l2_bucket *l2b;
struct l1_ttable *l1;
struct pglist plist;
struct vm_page *m;
pd_entry_t *pl1pt;
-   pt_entry_t *ptep, pte;
vaddr_t va, eva;
int error;
 
@@ -664,22 +660,6 @@ printf("%s: %d %d\n", __func__, domain, 
paddr_t pa = VM_PAGE_TO_PHYS(m);
 
pmap_kenter_pa(va, pa, PROT_READ | PROT_WRITE);
-   /*
-* Make sure the L1 descriptor table is mapped
-* with the cache-mode set to write-through, or
-* correctly synced.
-*/
-   l2b = pmap_get_l2_bucket(pmap_kernel(), va);
-   ptep = &l2b->l2b_kva[l2pte_index(va)];
-   pte = *ptep;
-
-   if ((pte & L2_S_CACHE_MASK) != pte_l2_s_cache_mode_pt) {
-   pte = (pte & ~L2_S_CACHE_MASK) | pte_l2_s_cache_mode_pt;
-   *ptep = pte;
-   PTE_SYNC(ptep);
-   cpu_tlb_flushD_SE(va);
-   }
-
m = TAILQ_NEXT(m, pageq);
}
 
@@ -798,7 +778,7 @@ pmap_alloc_l2_bucket(pmap_t pm, vaddr_t 
}
return (NULL);
}
-   pmap_l2ptp_ctor(ptep);
+   PTE_SYNC_RANGE(ptep, L2_TABLE_SIZE_REAL / sizeof(pt_entry_t));
pmap_extract(pmap_kernel(), (vaddr_t)ptep, &l2b->l2b_phys);
 
l2->l2_occupancy++;
@@ -894,40 +874,6 @@ pmap_free_l2_bucket(pmap_t pm, struct l2
 }
 
 /*
- * Cache constructors for L2 descriptor tables, metadata and pmap
- * structures.
- */
-void
-pmap_l2ptp_ctor(void *v)
-{
-   struct l2_bucket *l2b;
-   pt_entry_t *ptep, pte;
-   vaddr_t va = (vaddr_t)v & ~PGOFSET;
-
-   /*
-* The mappings for these page tables were initially made using
-* pmap_kenter_pa() by the pool subsystem. Therefore, the cache-
-* mode will not be right for page table mappings. To avoid
-* polluting the pmap_kenter_pa() code with a special case for
-* page tables, we simply fix up the cache-mode here if it's not
-* correct.
-*/
-   l2b = pmap_get_l2_bucket(pmap_kernel(), va);
-   KDASSERT(l2b != NULL);
-   ptep = &l2b->l2b_kva[l2pte_index(va)];
-   pte = *ptep;
-
-   if ((pte & L2_S_CACHE_MASK) != pte_l2_s_cache_mode_pt) {
-   *ptep = (pte & ~L2_S_CACHE_MASK) | pte_l2_s_cache_mode_pt;
-   PTE_SYNC(ptep);
-   cpu_tlb_flushD_SE(va);
-   cpu_cpwait();
-   }
-
-   PTE_SYNC_RANGE(v, L2_TABLE_SIZE_REAL / sizeof(pt_entry_t));
-}
-
-/*
  * Modify pte bits for all ptes corresponding to the given physical address.
  * We use `maskbits' rather than `clearbits' because we're always passing
  * constants and the latter would require an extra inversion at run-time.
@@ -955,13 +901,6 @@ pmap_clearbit(struct vm_page *pg, u_int 
return;
 
/*
-* If we are changing a writable or modified page to
-* read-only (or worse), be sure to flush it first.
-*/
-   if (maskbits & (PVF_WRITE|PVF_MOD))
-   pmap_clean_page(pg, FALSE);
-
-   /*
  

Re: read(2) on directories

2016-08-08 Thread Theo de Raadt
>> From source inspection, Net and Free appear to allow read(2) of
>> dirs to succeed.  However, since Linux, Mac OS X and Solaris have
>> the EISDIR behavior I think it is probably safe from a portability
>> standpoint.

I want to explain why I chose the semantic of "read returns 0",
about 20 years ago I guess.

All of those systems already have that semantic on NFS directories --
directory fd reads return 0.

Therefore, it seemed reasonable to assume that all programs could
already handle that case properly.  Sun and CSRG had done the auditing
work on the base utilities during the 90's, so applying the same
rule to native filesystem seemed sound.

It was simply impossible to add a new error condition to system call
#4, without auditing everything.

Maybe some of these vendors continued their utility audit and switched
to EISDIR on native filesystems.  Well we haven't done that work
yet.



Re: read(2) on directories

2016-08-08 Thread Theo de Raadt
> > "Todd C. Miller"  writes:
> >
> >> From source inspection, Net and Free appear to allow read(2) of
> >> dirs to succeed.  However, since Linux, Mac OS X and Solaris have
> >> the EISDIR behavior I think it is probably safe from a portability
> >> standpoint.
> >>
> >> We're long past the days when opendir(3)/readdir(3) used read(2)...
> >>
> >> HP-UX and AIX still allow reads of directories but no one cares
> >> about them ;-)
> >
> > So I think that we agree that EISDIR is more useful, and seems safe from
> > a portability POV.   I've built base and x sets on i386, and ajacoutot
> > ran the ports bulk builds.  The two offenders in the ports tree were due
> > to an unrelated glitch in base libtool which has since been fixed.
> 
> I've hold this for a few days until the release cycle comes back to an
> almost normal state.  But, as Theo points out, we have to think about the
> drawbacks.  Some configurations that do work right now are very likely
> to break.
> 
>   $ cat /; echo $?
>   cat: /: Is a directory
>   1
> 
> is only a trivial example.
> 
> I still think that it is better to error out instead of, for example,
> silently ignoring invalid/mistyped CLI arguments and config parameters.
> But I don't want to mindlessly break people's setups behind their backs,
> so I'd like to hear more opinions and... *test reports*.

The change will affect a huge number of program in significant ways.
I am waiting for people to actually try itd, and I haven't been seeing
that.

I don't like the cat example above.  It does not actually expose the
full change in behaviour from this simple program.  Here is a better
example:

   % cat / /etc/shells; echo $?
   #   $OpenBSD: shells,v 1.8 2009/02/14 17:06:40 sobrado Exp $
   #
   # list of acceptable shells for chpass(1).
   # ftpd(8) will not allow users to connect who are not using
   # one of these shells, unless the user is listed in /etc/ftpchroot.
   /bin/sh
   /bin/csh
   /bin/ksh
   0
   %

Will become:

   % cat / /etc/shells; echo $?
   cat: /: Is a directory
   1
   %

The semantic will cause a subtle behavioural difference in hundreds
of programs.



Re: read(2) on directories

2016-08-08 Thread Jeremie Courreges-Anglas
j...@wxcvbn.org (Jeremie Courreges-Anglas) writes:

> "Todd C. Miller"  writes:
>
>> From source inspection, Net and Free appear to allow read(2) of
>> dirs to succeed.  However, since Linux, Mac OS X and Solaris have
>> the EISDIR behavior I think it is probably safe from a portability
>> standpoint.
>>
>> We're long past the days when opendir(3)/readdir(3) used read(2)...
>>
>> HP-UX and AIX still allow reads of directories but no one cares
>> about them ;-)
>
> So I think that we agree that EISDIR is more useful, and seems safe from
> a portability POV.   I've built base and x sets on i386, and ajacoutot
> ran the ports bulk builds.  The two offenders in the ports tree were due
> to an unrelated glitch in base libtool which has since been fixed.

I've hold this for a few days until the release cycle comes back to an
almost normal state.  But, as Theo points out, we have to think about the
drawbacks.  Some configurations that do work right now are very likely
to break.

  $ cat /; echo $?
  cat: /: Is a directory
  1

is only a trivial example.

I still think that it is better to error out instead of, for example,
silently ignoring invalid/mistyped CLI arguments and config parameters.
But I don't want to mindlessly break people's setups behind their backs,
so I'd like to hear more opinions and... *test reports*.

Diff below for convenience.


Index: lib/libc/sys/read.2
===
RCS file: /cvs/src/lib/libc/sys/read.2,v
retrieving revision 1.35
diff -u -p -r1.35 read.2
--- lib/libc/sys/read.2 5 Feb 2015 02:33:09 -   1.35
+++ lib/libc/sys/read.2 9 Jul 2016 17:20:39 -
@@ -152,13 +152,15 @@ is not a valid file or socket descriptor
 Part of
 .Fa buf
 points outside the process's allocated address space.
-.It Bq Er EIO
-An I/O error occurred while reading from the file system.
 .It Bq Er EINTR
 A read from a slow device
 (i.e. one that might block for an arbitrary amount of time)
 was interrupted by the delivery of a signal
 before any data arrived.
+.It Bq Er EIO
+An I/O error occurred while reading from the file system.
+.It Bq Er EISDIR
+The underlying file is a directory.
 .El
 .Pp
 In addition,
Index: sys/kern/vfs_vnops.c
===
RCS file: /cvs/src/sys/kern/vfs_vnops.c,v
retrieving revision 1.85
diff -u -p -r1.85 vfs_vnops.c
--- sys/kern/vfs_vnops.c19 Jun 2016 11:54:33 -  1.85
+++ sys/kern/vfs_vnops.c9 Jul 2016 17:20:39 -
@@ -336,11 +336,13 @@ vn_read(struct file *fp, off_t *poff, st
if (vp->v_type != VCHR && count > LLONG_MAX - *poff)
return (EINVAL);
 
+   if (vp->v_type == VDIR)
+   return (EISDIR);
+
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
uio->uio_offset = *poff;
-   if (vp->v_type != VDIR)
-   error = VOP_READ(vp, uio,
-   (fp->f_flag & FNONBLOCK) ? IO_NDELAY : 0, cred);
+   error = VOP_READ(vp, uio, (fp->f_flag & FNONBLOCK) ? IO_NDELAY : 0,
+   cred);
*poff += count - uio->uio_resid;
VOP_UNLOCK(vp, p);
return (error);


-- 
jca | PGP: 0x1524E7EE / 5135 92C1 AD36 5293 2BDF  DDCC 0DFA 74AE 1524 E7EE



Re: ksh, ctrl-r followed by arrow key leaves "[D" or "[C" artifacts

2016-08-08 Thread Ingo Schwarze
Hi Martin,

Martin Natano wrote on Mon, Aug 08, 2016 at 08:21:50AM +0200:
> On Mon, Aug 08, 2016 at 03:33:23AM +0200, Ingo Schwarze wrote:

>> redirecting from misc@ to tech@ because i'm appending a patch
>> at the very end, lightly tested.
>> 
>> This has indeed been annoying me for years, but it never occurred
>> to me that i might be able to figure out what's going on.
>> Thanks for providing your analysis, i think it's spot on.
>> 
>> So the solution is to not swallow up that escape character, right?

> Thats not always correct.  With your patch ksh now eats the next key
> you type when you exit the search prompt with the escape key.

According to the documentation, typing the escape key alone is not
a supported method of exiting the search prompt.  The ksh(1) manual
says:

search-history: ^R
Enter incremental search mode.  [...]
The abort key will leave search mode.

Above, the term "abort key" is defined as follows:

abort: ^C, ^G
Useful as a response to a request for a search-history
pattern in order to abort the search.

And indeed, Ctrl-C and Ctrl-G work as expected to abort search-history.

The search-history description does not say anything that the escape
key could be used to leave search mode, nor do i see such a statement
anywhere else; instead, the search-history description goes on as
follows:

Other commands will be executed after leaving search mode.

As i read that, commands like

backward-word: [n] ^[b
beginning-of-history: ^[<
capitalize-word: [n] ^[C, ^[c
comment: ^[#
complete: ^[^[
complete-file: ^[^X
complete-list: ^I, ^[=
delete-word-backward: [n] ERASE, ^[^?, ^[^H, ^[h
delete-word-forward: [n] ^[d
downcase-word: [n] ^[L, ^[l
end-of-history: ^[>
expand-file: ^[*
forward-word: [n] ^[
goto-history: [n] ^[g
list: ^[?
prev-hist-word: [n] ^[., ^[_
search-character-backward: [n] ^[^]
set-mark-command: ^[
upcase-word: [n] ^[U, ^[u
yank-pop: ^[y

qualify as "other commands", and i don't see why it should matter
whether i type both bytes almost instantaneously or whether there
is a noticable delay between the two.

For that reason, i prefer my version of the patch to yours, and in
addition to being arguably better, it is certainly much simpler.
That said, i don't strongly object to yours, i also consider it
better than the current situation.

> The only way to know whether the esacpe is part of a longer
> sequence or was typed by the user

I don't understand at all why that should matter.

Yours,
  Ingo



Re: socket splice task

2016-08-08 Thread Alexander Bluhm
On Mon, Aug 08, 2016 at 11:52:58AM +0200, Martin Pieuchot wrote:
> On 07/30/16 00:17, Alexander Bluhm wrote:
> > Spliced TCP sockets become faster if we put the output part into
> > its own task thread.  This is inspired by userland copy where we
> > also have to go through the scheduler.  This gives the socket buffer
> > a chance to be filled up and tcp_output() is called less often and
> > with bigger chunks.
> 
> This is really interesting.  Do you have a clear understanding why is
> it getting faster?  I'm worried about introducing a hack just for socket
> splicing and would prefer a more generic solution if possible.

I run a profiling kernel.  The first block is without my diff the
second is with the task queue for splicing applied.

As you can see, the number of calls to the input functions are
roughly the same.

[6] 31.60.042.26   23694 tcp_input [6]
0.520.46   14253/29465   tcp_output [7]
0.000.63   14239/14244   sorwakeup [15]
0.000.429451/9451sowwakeup [18]
0.000.00   14239/21847   sbappendstream [487]
...


[8] 20.60.062.75   23712 tcp_input [8]
0.971.32   16470/16984   tcp_output [9]
0.000.00   14258/14367   sorwakeup [352]
0.000.009434/9504sowwakeup [378]
0.000.00   14254/14498   sbappendstream [416]
...

When looking at somove(9) the number of calls go down from 23620
to 205 if it is running in a task.  Note that the sbsync calls are
equal which means the same number of mbufs are transferred.

0.000.00   1/23620   sosplice [182]
0.000.429449/23620   sowwakeup [18]
0.000.63   14170/23620   sorwakeup [15]
[8] 14.50.001.05   23620 somove [8]
0.001.05   15210/15220   tcp_usrreq [9]
0.000.017605/7605m_resethdr [74]
0.000.00   23620/250919  splassert_check [410]
0.000.00   14241/14242   sbsync [496]

0.000.00   1/205 sosplice [292]
0.000.08 204/205 sotask [46]
[45] 0.60.000.08 205 somove [45]
0.020.06 410/529 tcp_usrreq [39]
0.000.00 205/205 m_resethdr [339]
0.000.00   14147/14152   sbsync [708]
0.000.00 205/265679  splassert_check [626]

Especially tcp_usrreq() calling tcp_output() makes the difference.

[9] 14.40.001.05   15220 tcp_usrreq [9]
0.560.49   15212/29465   tcp_output [7]
...

[39] 0.70.020.08 529 tcp_usrreq [39]
0.030.04 504/16984   tcp_output [9]
...

The total number of tcp_output() calls is not reduced that much,
as we still have the ACKs triggered from tcp_input().

0.520.46   14253/29465   tcp_input [6]
0.560.49   15212/29465   tcp_usrreq [9]
[7] 27.91.080.95   29465 tcp_output [7]
0.440.50   23749/23750   ip_output [10]
...

0.030.04 504/16984   tcp_usrreq [39]
0.971.32   16470/16984   tcp_input [8]
[9] 17.31.001.36   16984 tcp_output [9]
0.420.84   21365/21370   ip_output [12]
...

So my idea is to delay tcp_output() a little bit to allow the socket
buffers fill up.  Then we can use bulk operations.  The concept to
use a thread is from the analogy with a user land copy.  There the
buffer fills until the relay process is scheduled.  Using a soft
interrupt would have the same effect.  We are running this task
diff in production for a year now.

bluhm



Re: relayd TLS session caching

2016-08-08 Thread Claudio Jeker
On Tue, Jul 19, 2016 at 03:32:13PM +0200, Claudio Jeker wrote:
> At the moment relayd's TLS session caching is a bit busted because
> the multiple relay processes do not share state.
> The following diff adds SSL session caching and sharing of the TLS ticket
> secrets. Which this openssl s_client -connect W.X.Y.Z:443 -reconnect
> reuses the connection after the first one.
> This should help TLS preformance since no exchange with the ca process is
> needed if there is a cache hit.
> 
> The shared SSL session cache is in a new process (tlsc) but we still use
> the internal cache as well (which is populated on cache miss).
> 
> The TLS token secret are generated on startup and a key rollover happens
> every 4h with a rekey window of 10min. I see no need to add tunables for
> this and also the shared SSL cache will just use the tls session cache
> settings (because OpenSSL will do the garbage collection of the external
> cache via the internal one).
> 
> This is the first version which seems to work for me when hammering relayd
> with openssl s_client.


Updated diff. This includes a diff by benno@ to disable TLS tickets and
does the caching a bit different (removing the need of a delete ssl
callback and replacing it with a GC run in tlsc). The TLS cache
timeout will be used for both OpenSSL and relayd session caches.
The cache size limits therefor the size of the relayd session cache as
well but worst case could be up to prefork times (default 3) bigger. Since
the space needed is very small and anything modern should use tickets
anyway I think this is acceptable.

-- 
:wq Claudio


Index: Makefile
===
RCS file: /cvs/src/usr.sbin/relayd/Makefile,v
retrieving revision 1.29
diff -u -p -r1.29 Makefile
--- Makefile21 Nov 2015 12:37:42 -  1.29
+++ Makefile19 Jul 2016 08:33:26 -
@@ -6,7 +6,7 @@ SRCS+=  agentx.c ca.c carp.c check_icmp.
check_tcp.c config.c control.c hce.c log.c name2id.c \
pfe.c pfe_filter.c pfe_route.c proc.c \
relay.c relay_http.c relay_udp.c relayd.c \
-   shuffle.c snmp.c ssl.c util.c
+   shuffle.c snmp.c ssl.c tlsc.c util.c
 MAN=   relayd.8 relayd.conf.5
 
 LDADD= -levent -lssl -lcrypto -lutil
Index: ca.c
===
RCS file: /cvs/src/usr.sbin/relayd/ca.c,v
retrieving revision 1.16
diff -u -p -r1.16 ca.c
--- ca.c5 Dec 2015 13:13:11 -   1.16
+++ ca.c19 Jul 2016 13:18:33 -
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -256,6 +257,7 @@ static int
 rsae_send_imsg(int flen, const u_char *from, u_char *to, RSA *rsa,
 int padding, u_int cmd)
 {
+   struct pollfdpfd[1];
struct ctl_keyop cko;
int  ret = 0;
objid_t *id;
@@ -292,9 +294,21 @@ rsae_send_imsg(int flen, const u_char *f
 * operation in OpenSSL's engine layer.
 */
imsg_composev(ibuf, cmd, 0, 0, -1, iov, cnt);
-   imsg_flush(ibuf);
+   if (imsg_flush(ibuf) == -1)
+   log_warn("rsae_send_imsg: imsg_flush");
 
+   pfd[0].fd = ibuf->fd;
+   pfd[0].events = POLLIN;
while (!done) {
+   switch (poll(pfd, 1, 5 * 1000)) {
+   case -1:
+   fatal("rsae_send_imsg: poll");
+   case 0:
+   log_warnx("rsae_send_imsg: poll timeout");
+   break;
+   default:
+   break;
+   }
if ((n = imsg_read(ibuf)) == -1 && errno != EAGAIN)
fatalx("imsg_read");
if (n == 0)
Index: config.c
===
RCS file: /cvs/src/usr.sbin/relayd/config.c,v
retrieving revision 1.27
diff -u -p -r1.27 config.c
--- config.c7 Dec 2015 04:03:27 -   1.27
+++ config.c18 Jul 2016 13:01:35 -
@@ -51,6 +51,7 @@ config_init(struct relayd *env)
ps->ps_what[PROC_CA] = CONFIG_RELAYS;
ps->ps_what[PROC_RELAY] = CONFIG_RELAYS|
CONFIG_TABLES|CONFIG_PROTOS|CONFIG_CA_ENGINE;
+   ps->ps_what[PROC_TLSC] = 0;
}
 
/* Other configuration */
Index: parse.y
===
RCS file: /cvs/src/usr.sbin/relayd/parse.y,v
retrieving revision 1.207
diff -u -p -r1.207 parse.y
--- parse.y 21 Jun 2016 21:35:25 -  1.207
+++ parse.y 22 Jul 2016 09:38:30 -
@@ -172,13 +172,13 @@ typedef struct {
 %token SOCKET SPLICE SSL STICKYADDR STYLE TABLE TAG TAGGED TCP TIMEOUT TLS TO
 %token ROUTER RTLABEL TRANSPARENT TRAP UPDATES URL VIRTUAL WITH TTL RTABLE
 %token MATCH PARAMS RANDOM LEASTSTATES SRCHASH KEY CERTIFICATE PASSWORD ECDH
-%token EDH CURVE
+%token EDH CURVE TICKETS
 %token   STRING
 %token  

Re: socket splice task

2016-08-08 Thread Alexander Bluhm
On Mon, Aug 08, 2016 at 12:17:30PM +0200, Martin Pieuchot wrote:
> On 07/30/16 02:41, Alexander Bluhm wrote:
> Are you sure it is not set?  Or does the scheduler keeps selecting your
> task?

After some printf debugging I can say that it is not set.  The
scheduler is switching between softnet and sosplice tasks.  So
neither of the threads is going through roundrobin() twice.  User
land is not scheduled.

bluhm



zaurus/armish/armv7: move cpu_setup() call

2016-08-08 Thread Mark Kettenis
As patrick@ found earlier, on Cortex-A53 we have to enable the data
cache before doing any atomic operations.  Currently we do this in
cpu_configure(), which is really late, after several kernel subsystems
get initialized.  And some of those subsystems use mutexes, which use
atomic operations...

Patrck's solution was to just set the data cache enable bit in the cpu
control register early on in initarm().  That seems to work, but I'm
not entirely confident about this.  It means we run a significant
amount of code in the half-enabled cache state.  So here is a diff
that moves the cpu_setup() call to the tail of initarm() on all
OpenBSD/arm platforms.  This seems to work fine on armv7.  Can
somebody give this a spin on zaurus and/or armish?

Thanks,

Mark


Index: arm/arm/arm32_machdep.c
===
RCS file: /cvs/src/sys/arch/arm/arm/arm32_machdep.c,v
retrieving revision 1.48
diff -u -p -r1.48 arm32_machdep.c
--- arm/arm/arm32_machdep.c 31 Jan 2016 00:14:50 -  1.48
+++ arm/arm/arm32_machdep.c 8 Aug 2016 13:27:12 -
@@ -165,7 +165,7 @@ arm32_vector_init(vaddr_t va, int which)
 *
 * Note: This has to be done here (and not just in
 * cpu_setup()) because the vector page needs to be
-* accessible *before* cpu_startup() is called.
+* accessible *before* main() is called.
 * Think ddb(9) ...
 *
 * NOTE: If the CPU control register is not readable,
@@ -236,12 +236,6 @@ cpu_startup()
u_int loop;
paddr_t minaddr;
paddr_t maxaddr;
-
-   proc0paddr = (struct user *)kernelstack.pv_va;
-   proc0.p_addr = proc0paddr;
-
-   /* Set the cpu control register */
-   cpu_setup();
 
/* Lock down zero page */
vector_page_setprot(PROT_READ | PROT_EXEC);
Index: armv7/armv7/armv7_machdep.c
===
RCS file: /cvs/src/sys/arch/armv7/armv7/armv7_machdep.c,v
retrieving revision 1.34
diff -u -p -r1.34 armv7_machdep.c
--- armv7/armv7/armv7_machdep.c 30 Jul 2016 08:07:01 -  1.34
+++ armv7/armv7/armv7_machdep.c 8 Aug 2016 13:27:12 -
@@ -795,6 +795,8 @@ initarm(void *arg0, void *arg1, void *ar
 #endif
printf("board type: %u\n", board_id);
 
+   cpu_setup();
+
/* We return the new stack pointer address */
return(kernelstack.pv_va + USPACE_SVC_STACK_TOP);
 }
Index: armish/armish/armish_machdep.c
===
RCS file: /cvs/src/sys/arch/armish/armish/armish_machdep.c,v
retrieving revision 1.39
diff -u -p -r1.39 armish_machdep.c
--- armish/armish/armish_machdep.c  10 May 2015 15:56:28 -  1.39
+++ armish/armish/armish_machdep.c  8 Aug 2016 13:27:12 -
@@ -770,6 +770,8 @@ initarm(void *arg0, void *arg1, void *ar
Debugger();
 #endif
 
+   cpu_setup();
+
/* We return the new stack pointer address */
return(kernelstack.pv_va + USPACE_SVC_STACK_TOP);
 }
Index: zaurus/zaurus/zaurus_machdep.c
===
RCS file: /cvs/src/sys/arch/zaurus/zaurus/zaurus_machdep.c,v
retrieving revision 1.59
diff -u -p -r1.59 zaurus_machdep.c
--- zaurus/zaurus/zaurus_machdep.c  10 May 2015 15:56:28 -  1.59
+++ zaurus/zaurus/zaurus_machdep.c  8 Aug 2016 13:27:12 -
@@ -1110,6 +1110,8 @@ initarm(void *arg0, void *arg1, void *ar
Debugger();
 #endif
 
+   cpu_setup();
+
/* We return the new stack pointer address */
return(kernelstack.pv_va + USPACE_SVC_STACK_TOP);
 }



Re: uvm kentry free list: use an SLIST instead of RB abuse

2016-08-08 Thread Mark Kettenis
> From: David Gwynne 
> Date: Mon, 8 Aug 2016 23:14:13 +1000
> 
> > On 8 Aug 2016, at 10:46 PM, Mark Kettenis  wrote:
> > 
> >> Date: Mon, 8 Aug 2016 21:56:23 +1000
> >> From: David Gwynne 
> >> 
> >> the current tracking of free static map entries is done as hand
> >> rolled list manipulations using pointers in an rb_entry. it's really
> >> confusing to read.
> >> 
> >> since its simple list manipulations, this replaces the hand rolled
> >> code with an SLIST.
> >> 
> >> ok?
> > 
> > I like this.  How does this change the way entries are recycled?  The
> > new code will use the last entry that was freed.  Was this the case
> > with the old code as well?
> 
> the RB version put free items on the head (uvm.kentry_free) and removed them 
> from there too. the SLIST ops are the same in that respect.
> 
> i think the order that new entries from a fresh page are added in
> uvm_mapent_alloc is different, but that is not the hot path by any
> means.

I don't really worry about tat path anyway.

Thanks for the explanation; ok kettenis@

> >> Index: uvm.h
> >> ===
> >> RCS file: /cvs/src/sys/uvm/uvm.h,v
> >> retrieving revision 1.60
> >> diff -u -p -r1.60 uvm.h
> >> --- uvm.h  8 Oct 2015 15:58:38 -   1.60
> >> +++ uvm.h  8 Aug 2016 11:53:57 -
> >> @@ -69,7 +69,7 @@ struct uvm {
> >>struct mutex aiodoned_lock;
> >> 
> >>/* static kernel map entry pool */
> >> -  vm_map_entry_t kentry_free; /* free page pool */
> >> +  SLIST_HEAD(, vm_map_entry) kentry_free; /* free page pool */
> >> 
> >>/* aio_done is locked by uvm.aiodoned_lock. */
> >>TAILQ_HEAD(, buf) aio_done; /* done async i/o reqs */
> >> Index: uvm_map.c
> >> ===
> >> RCS file: /cvs/src/sys/uvm/uvm_map.c,v
> >> retrieving revision 1.219
> >> diff -u -p -r1.219 uvm_map.c
> >> --- uvm_map.c  30 Jul 2016 16:43:44 -  1.219
> >> +++ uvm_map.c  8 Aug 2016 11:53:57 -
> >> @@ -1669,25 +1669,23 @@ uvm_mapent_alloc(struct vm_map *map, int
> >> 
> >>if (map->flags & VM_MAP_INTRSAFE || cold) {
> >>mtx_enter(&uvm_kmapent_mtx);
> >> -  me = uvm.kentry_free;
> >> -  if (me == NULL) {
> >> +  if (SLIST_EMPTY(&uvm.kentry_free)) {
> >>ne = km_alloc(PAGE_SIZE, &kv_page, &kp_dirty,
> >>&kd_nowait);
> >>if (ne == NULL)
> >>panic("uvm_mapent_alloc: cannot allocate map "
> >>"entry");
> >> -  for (i = 0;
> >> -  i < PAGE_SIZE / sizeof(struct vm_map_entry) - 1;
> >> -  i++)
> >> -  RB_LEFT(&ne[i], daddrs.addr_entry) = &ne[i + 1];
> >> -  RB_LEFT(&ne[i], daddrs.addr_entry) = NULL;
> >> -  me = ne;
> >> +  for (i = 0; i < PAGE_SIZE / sizeof(*ne); i++) {
> >> +  SLIST_INSERT_HEAD(&uvm.kentry_free,
> >> +  &ne[i], daddrs.addr_kentry);
> >> +  }
> >>if (ratecheck(&uvm_kmapent_last_warn_time,
> >>&uvm_kmapent_warn_rate))
> >>printf("uvm_mapent_alloc: out of static "
> >>"map entries\n");
> >>}
> >> -  uvm.kentry_free = RB_LEFT(me, daddrs.addr_entry);
> >> +  me = SLIST_FIRST(&uvm.kentry_free);
> >> +  SLIST_REMOVE_HEAD(&uvm.kentry_free, daddrs.addr_kentry);
> >>uvmexp.kmapent++;
> >>mtx_leave(&uvm_kmapent_mtx);
> >>me->flags = UVM_MAP_STATIC;
> >> @@ -1725,8 +1723,7 @@ uvm_mapent_free(struct vm_map_entry *me)
> >> {
> >>if (me->flags & UVM_MAP_STATIC) {
> >>mtx_enter(&uvm_kmapent_mtx);
> >> -  RB_LEFT(me, daddrs.addr_entry) = uvm.kentry_free;
> >> -  uvm.kentry_free = me;
> >> +  SLIST_INSERT_HEAD(&uvm.kentry_free, me, daddrs.addr_kentry);
> >>uvmexp.kmapent--;
> >>mtx_leave(&uvm_kmapent_mtx);
> >>} else if (me->flags & UVM_MAP_KMEM) {
> >> @@ -2795,11 +2792,10 @@ uvm_map_init(void)
> >> 
> >>/* now set up static pool of kernel map entries ... */
> >>mtx_init(&uvm_kmapent_mtx, IPL_VM);
> >> -  uvm.kentry_free = NULL;
> >> +  SLIST_INIT(&uvm.kentry_free);
> >>for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) {
> >> -  RB_LEFT(&kernel_map_entry[lcv], daddrs.addr_entry) =
> >> -  uvm.kentry_free;
> >> -  uvm.kentry_free = &kernel_map_entry[lcv];
> >> +  SLIST_INSERT_HEAD(&uvm.kentry_free,
> >> +  &kernel_map_entry[lcv], daddrs.addr_kentry);
> >>}
> >> 
> >>/* initialize the map-related pools. */
> >> Index: uvm_map.h
> >> ===
> >> RCS file: /cvs/src/sys/uvm/uvm_map.h,v
> >> retrieving rev

Re: uvm kentry free list: use an SLIST instead of RB abuse

2016-08-08 Thread David Gwynne

> On 8 Aug 2016, at 10:46 PM, Mark Kettenis  wrote:
> 
>> Date: Mon, 8 Aug 2016 21:56:23 +1000
>> From: David Gwynne 
>> 
>> the current tracking of free static map entries is done as hand
>> rolled list manipulations using pointers in an rb_entry. it's really
>> confusing to read.
>> 
>> since its simple list manipulations, this replaces the hand rolled
>> code with an SLIST.
>> 
>> ok?
> 
> I like this.  How does this change the way entries are recycled?  The
> new code will use the last entry that was freed.  Was this the case
> with the old code as well?

the RB version put free items on the head (uvm.kentry_free) and removed them 
from there too. the SLIST ops are the same in that respect.

i think the order that new entries from a fresh page are added in 
uvm_mapent_alloc is different, but that is not the hot path by any means.

dlg

> 
> 
>> Index: uvm.h
>> ===
>> RCS file: /cvs/src/sys/uvm/uvm.h,v
>> retrieving revision 1.60
>> diff -u -p -r1.60 uvm.h
>> --- uvm.h8 Oct 2015 15:58:38 -   1.60
>> +++ uvm.h8 Aug 2016 11:53:57 -
>> @@ -69,7 +69,7 @@ struct uvm {
>>  struct mutex aiodoned_lock;
>> 
>>  /* static kernel map entry pool */
>> -vm_map_entry_t kentry_free; /* free page pool */
>> +SLIST_HEAD(, vm_map_entry) kentry_free; /* free page pool */
>> 
>>  /* aio_done is locked by uvm.aiodoned_lock. */
>>  TAILQ_HEAD(, buf) aio_done; /* done async i/o reqs */
>> Index: uvm_map.c
>> ===
>> RCS file: /cvs/src/sys/uvm/uvm_map.c,v
>> retrieving revision 1.219
>> diff -u -p -r1.219 uvm_map.c
>> --- uvm_map.c30 Jul 2016 16:43:44 -  1.219
>> +++ uvm_map.c8 Aug 2016 11:53:57 -
>> @@ -1669,25 +1669,23 @@ uvm_mapent_alloc(struct vm_map *map, int
>> 
>>  if (map->flags & VM_MAP_INTRSAFE || cold) {
>>  mtx_enter(&uvm_kmapent_mtx);
>> -me = uvm.kentry_free;
>> -if (me == NULL) {
>> +if (SLIST_EMPTY(&uvm.kentry_free)) {
>>  ne = km_alloc(PAGE_SIZE, &kv_page, &kp_dirty,
>>  &kd_nowait);
>>  if (ne == NULL)
>>  panic("uvm_mapent_alloc: cannot allocate map "
>>  "entry");
>> -for (i = 0;
>> -i < PAGE_SIZE / sizeof(struct vm_map_entry) - 1;
>> -i++)
>> -RB_LEFT(&ne[i], daddrs.addr_entry) = &ne[i + 1];
>> -RB_LEFT(&ne[i], daddrs.addr_entry) = NULL;
>> -me = ne;
>> +for (i = 0; i < PAGE_SIZE / sizeof(*ne); i++) {
>> +SLIST_INSERT_HEAD(&uvm.kentry_free,
>> +&ne[i], daddrs.addr_kentry);
>> +}
>>  if (ratecheck(&uvm_kmapent_last_warn_time,
>>  &uvm_kmapent_warn_rate))
>>  printf("uvm_mapent_alloc: out of static "
>>  "map entries\n");
>>  }
>> -uvm.kentry_free = RB_LEFT(me, daddrs.addr_entry);
>> +me = SLIST_FIRST(&uvm.kentry_free);
>> +SLIST_REMOVE_HEAD(&uvm.kentry_free, daddrs.addr_kentry);
>>  uvmexp.kmapent++;
>>  mtx_leave(&uvm_kmapent_mtx);
>>  me->flags = UVM_MAP_STATIC;
>> @@ -1725,8 +1723,7 @@ uvm_mapent_free(struct vm_map_entry *me)
>> {
>>  if (me->flags & UVM_MAP_STATIC) {
>>  mtx_enter(&uvm_kmapent_mtx);
>> -RB_LEFT(me, daddrs.addr_entry) = uvm.kentry_free;
>> -uvm.kentry_free = me;
>> +SLIST_INSERT_HEAD(&uvm.kentry_free, me, daddrs.addr_kentry);
>>  uvmexp.kmapent--;
>>  mtx_leave(&uvm_kmapent_mtx);
>>  } else if (me->flags & UVM_MAP_KMEM) {
>> @@ -2795,11 +2792,10 @@ uvm_map_init(void)
>> 
>>  /* now set up static pool of kernel map entries ... */
>>  mtx_init(&uvm_kmapent_mtx, IPL_VM);
>> -uvm.kentry_free = NULL;
>> +SLIST_INIT(&uvm.kentry_free);
>>  for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) {
>> -RB_LEFT(&kernel_map_entry[lcv], daddrs.addr_entry) =
>> -uvm.kentry_free;
>> -uvm.kentry_free = &kernel_map_entry[lcv];
>> +SLIST_INSERT_HEAD(&uvm.kentry_free,
>> +&kernel_map_entry[lcv], daddrs.addr_kentry);
>>  }
>> 
>>  /* initialize the map-related pools. */
>> Index: uvm_map.h
>> ===
>> RCS file: /cvs/src/sys/uvm/uvm_map.h,v
>> retrieving revision 1.55
>> diff -u -p -r1.55 uvm_map.h
>> --- uvm_map.h9 Sep 2015 23:33:37 -   1.55
>> +++ uvm_map.h8 Aug 2016 11:53:57 -
>> @@ -161,6 +161,7 @@ union vm_map_object {
>> struct vm_map_entry {
>>  uni

Re: uvm kentry free list: use an SLIST instead of RB abuse

2016-08-08 Thread Mark Kettenis
> Date: Mon, 8 Aug 2016 21:56:23 +1000
> From: David Gwynne 
> 
> the current tracking of free static map entries is done as hand
> rolled list manipulations using pointers in an rb_entry. it's really
> confusing to read.
> 
> since its simple list manipulations, this replaces the hand rolled
> code with an SLIST.
> 
> ok?

I like this.  How does this change the way entries are recycled?  The
new code will use the last entry that was freed.  Was this the case
with the old code as well?


> Index: uvm.h
> ===
> RCS file: /cvs/src/sys/uvm/uvm.h,v
> retrieving revision 1.60
> diff -u -p -r1.60 uvm.h
> --- uvm.h 8 Oct 2015 15:58:38 -   1.60
> +++ uvm.h 8 Aug 2016 11:53:57 -
> @@ -69,7 +69,7 @@ struct uvm {
>   struct mutex aiodoned_lock;
>  
>   /* static kernel map entry pool */
> - vm_map_entry_t kentry_free; /* free page pool */
> + SLIST_HEAD(, vm_map_entry) kentry_free; /* free page pool */
>  
>   /* aio_done is locked by uvm.aiodoned_lock. */
>   TAILQ_HEAD(, buf) aio_done; /* done async i/o reqs */
> Index: uvm_map.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_map.c,v
> retrieving revision 1.219
> diff -u -p -r1.219 uvm_map.c
> --- uvm_map.c 30 Jul 2016 16:43:44 -  1.219
> +++ uvm_map.c 8 Aug 2016 11:53:57 -
> @@ -1669,25 +1669,23 @@ uvm_mapent_alloc(struct vm_map *map, int
>  
>   if (map->flags & VM_MAP_INTRSAFE || cold) {
>   mtx_enter(&uvm_kmapent_mtx);
> - me = uvm.kentry_free;
> - if (me == NULL) {
> + if (SLIST_EMPTY(&uvm.kentry_free)) {
>   ne = km_alloc(PAGE_SIZE, &kv_page, &kp_dirty,
>   &kd_nowait);
>   if (ne == NULL)
>   panic("uvm_mapent_alloc: cannot allocate map "
>   "entry");
> - for (i = 0;
> - i < PAGE_SIZE / sizeof(struct vm_map_entry) - 1;
> - i++)
> - RB_LEFT(&ne[i], daddrs.addr_entry) = &ne[i + 1];
> - RB_LEFT(&ne[i], daddrs.addr_entry) = NULL;
> - me = ne;
> + for (i = 0; i < PAGE_SIZE / sizeof(*ne); i++) {
> + SLIST_INSERT_HEAD(&uvm.kentry_free,
> + &ne[i], daddrs.addr_kentry);
> + }
>   if (ratecheck(&uvm_kmapent_last_warn_time,
>   &uvm_kmapent_warn_rate))
>   printf("uvm_mapent_alloc: out of static "
>   "map entries\n");
>   }
> - uvm.kentry_free = RB_LEFT(me, daddrs.addr_entry);
> + me = SLIST_FIRST(&uvm.kentry_free);
> + SLIST_REMOVE_HEAD(&uvm.kentry_free, daddrs.addr_kentry);
>   uvmexp.kmapent++;
>   mtx_leave(&uvm_kmapent_mtx);
>   me->flags = UVM_MAP_STATIC;
> @@ -1725,8 +1723,7 @@ uvm_mapent_free(struct vm_map_entry *me)
>  {
>   if (me->flags & UVM_MAP_STATIC) {
>   mtx_enter(&uvm_kmapent_mtx);
> - RB_LEFT(me, daddrs.addr_entry) = uvm.kentry_free;
> - uvm.kentry_free = me;
> + SLIST_INSERT_HEAD(&uvm.kentry_free, me, daddrs.addr_kentry);
>   uvmexp.kmapent--;
>   mtx_leave(&uvm_kmapent_mtx);
>   } else if (me->flags & UVM_MAP_KMEM) {
> @@ -2795,11 +2792,10 @@ uvm_map_init(void)
>  
>   /* now set up static pool of kernel map entries ... */
>   mtx_init(&uvm_kmapent_mtx, IPL_VM);
> - uvm.kentry_free = NULL;
> + SLIST_INIT(&uvm.kentry_free);
>   for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) {
> - RB_LEFT(&kernel_map_entry[lcv], daddrs.addr_entry) =
> - uvm.kentry_free;
> - uvm.kentry_free = &kernel_map_entry[lcv];
> + SLIST_INSERT_HEAD(&uvm.kentry_free,
> + &kernel_map_entry[lcv], daddrs.addr_kentry);
>   }
>  
>   /* initialize the map-related pools. */
> Index: uvm_map.h
> ===
> RCS file: /cvs/src/sys/uvm/uvm_map.h,v
> retrieving revision 1.55
> diff -u -p -r1.55 uvm_map.h
> --- uvm_map.h 9 Sep 2015 23:33:37 -   1.55
> +++ uvm_map.h 8 Aug 2016 11:53:57 -
> @@ -161,6 +161,7 @@ union vm_map_object {
>  struct vm_map_entry {
>   union {
>   RB_ENTRY(vm_map_entry)  addr_entry; /* address tree */
> + SLIST_ENTRY(vm_map_entry) addr_kentry;
>   } daddrs;
>  
>   union {
> 
> 



Re: Fix Wacom Intuos S 2 descriptor and make wsmouse work

2016-08-08 Thread Martin Pieuchot
On 08/08/16 13:31, Frank Groeneveld wrote:
> [...]
>> I afraid you'll need to match the two interfaces of your device,
>> something which is not trivial with the current framework.
> 
> Do you know of a driver that currently does this?

Search for UHIDEV_CLAIM_ALLREPORTID, at least upd(4) uses it.



uvm kentry free list: use an SLIST instead of RB abuse

2016-08-08 Thread David Gwynne
the current tracking of free static map entries is done as hand
rolled list manipulations using pointers in an rb_entry. it's really
confusing to read.

since its simple list manipulations, this replaces the hand rolled
code with an SLIST.

ok?

Index: uvm.h
===
RCS file: /cvs/src/sys/uvm/uvm.h,v
retrieving revision 1.60
diff -u -p -r1.60 uvm.h
--- uvm.h   8 Oct 2015 15:58:38 -   1.60
+++ uvm.h   8 Aug 2016 11:53:57 -
@@ -69,7 +69,7 @@ struct uvm {
struct mutex aiodoned_lock;
 
/* static kernel map entry pool */
-   vm_map_entry_t kentry_free; /* free page pool */
+   SLIST_HEAD(, vm_map_entry) kentry_free; /* free page pool */
 
/* aio_done is locked by uvm.aiodoned_lock. */
TAILQ_HEAD(, buf) aio_done; /* done async i/o reqs */
Index: uvm_map.c
===
RCS file: /cvs/src/sys/uvm/uvm_map.c,v
retrieving revision 1.219
diff -u -p -r1.219 uvm_map.c
--- uvm_map.c   30 Jul 2016 16:43:44 -  1.219
+++ uvm_map.c   8 Aug 2016 11:53:57 -
@@ -1669,25 +1669,23 @@ uvm_mapent_alloc(struct vm_map *map, int
 
if (map->flags & VM_MAP_INTRSAFE || cold) {
mtx_enter(&uvm_kmapent_mtx);
-   me = uvm.kentry_free;
-   if (me == NULL) {
+   if (SLIST_EMPTY(&uvm.kentry_free)) {
ne = km_alloc(PAGE_SIZE, &kv_page, &kp_dirty,
&kd_nowait);
if (ne == NULL)
panic("uvm_mapent_alloc: cannot allocate map "
"entry");
-   for (i = 0;
-   i < PAGE_SIZE / sizeof(struct vm_map_entry) - 1;
-   i++)
-   RB_LEFT(&ne[i], daddrs.addr_entry) = &ne[i + 1];
-   RB_LEFT(&ne[i], daddrs.addr_entry) = NULL;
-   me = ne;
+   for (i = 0; i < PAGE_SIZE / sizeof(*ne); i++) {
+   SLIST_INSERT_HEAD(&uvm.kentry_free,
+   &ne[i], daddrs.addr_kentry);
+   }
if (ratecheck(&uvm_kmapent_last_warn_time,
&uvm_kmapent_warn_rate))
printf("uvm_mapent_alloc: out of static "
"map entries\n");
}
-   uvm.kentry_free = RB_LEFT(me, daddrs.addr_entry);
+   me = SLIST_FIRST(&uvm.kentry_free);
+   SLIST_REMOVE_HEAD(&uvm.kentry_free, daddrs.addr_kentry);
uvmexp.kmapent++;
mtx_leave(&uvm_kmapent_mtx);
me->flags = UVM_MAP_STATIC;
@@ -1725,8 +1723,7 @@ uvm_mapent_free(struct vm_map_entry *me)
 {
if (me->flags & UVM_MAP_STATIC) {
mtx_enter(&uvm_kmapent_mtx);
-   RB_LEFT(me, daddrs.addr_entry) = uvm.kentry_free;
-   uvm.kentry_free = me;
+   SLIST_INSERT_HEAD(&uvm.kentry_free, me, daddrs.addr_kentry);
uvmexp.kmapent--;
mtx_leave(&uvm_kmapent_mtx);
} else if (me->flags & UVM_MAP_KMEM) {
@@ -2795,11 +2792,10 @@ uvm_map_init(void)
 
/* now set up static pool of kernel map entries ... */
mtx_init(&uvm_kmapent_mtx, IPL_VM);
-   uvm.kentry_free = NULL;
+   SLIST_INIT(&uvm.kentry_free);
for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) {
-   RB_LEFT(&kernel_map_entry[lcv], daddrs.addr_entry) =
-   uvm.kentry_free;
-   uvm.kentry_free = &kernel_map_entry[lcv];
+   SLIST_INSERT_HEAD(&uvm.kentry_free,
+   &kernel_map_entry[lcv], daddrs.addr_kentry);
}
 
/* initialize the map-related pools. */
Index: uvm_map.h
===
RCS file: /cvs/src/sys/uvm/uvm_map.h,v
retrieving revision 1.55
diff -u -p -r1.55 uvm_map.h
--- uvm_map.h   9 Sep 2015 23:33:37 -   1.55
+++ uvm_map.h   8 Aug 2016 11:53:57 -
@@ -161,6 +161,7 @@ union vm_map_object {
 struct vm_map_entry {
union {
RB_ENTRY(vm_map_entry)  addr_entry; /* address tree */
+   SLIST_ENTRY(vm_map_entry) addr_kentry;
} daddrs;
 
union {



Re: Fix Wacom Intuos S 2 descriptor and make wsmouse work

2016-08-08 Thread Frank Groeneveld
On Mon, Aug 08, 2016 at 11:12:02AM +0200, Martin Pieuchot wrote:
> > ...
> > Is the attached patch acceptable? Or would it be better to write a
> > seperate driver, such as uwacom, that does the same as ums, except that
> > it doesn't call hidms_input, but implements that itself?
> 
> I believe that a new driver makes more sense.  Because if one wants to
> extend your work to fully support the drawing table it won't be able to
> do it in ums(4).

That's what I thought after building the current patch, thanks for
confirming this.

> I afraid you'll need to match the two interfaces of your device,
> something which is not trivial with the current framework.

Do you know of a driver that currently does this?

> Newer versions of libinput include some support for various Intuos
> devices, you might want to look at this if you haven't done it already.

Thanks for the hint, didn't know. I'll dig into this.

Frank



Re: ksh, ctrl-r followed by arrow key leaves "[D" or "[C" artifacts

2016-08-08 Thread Alexander Hall
On Mon, Aug 08, 2016 at 08:21:50AM +0200, Martin Natano wrote:
> On Mon, Aug 08, 2016 at 03:33:23AM +0200, Ingo Schwarze wrote:
> > Hi Dave,
> > 
> > redirecting from misc@ to tech@ because i'm appending a patch
> > at the very end, lightly tested.
> > 
> > This has indeed been annoying me for years, but it never occurred
> > to me that i might be able to figure out what's going on.
> > Thanks for providing your analysis, i think it's spot on.
> > 
> > So the solution is to not swallow up that escape character, right?
> 
> Thats not always correct. With your patch ksh now eats the next key you
> type when you exit the search prompt with the escape key. The only way
> to know whether the esacpe is part of a longer sequence or was typed by
> the user is timing. I've sent a patch some time ago that checks if more
> bytes are available from the input descriptor to decide whether the esc
> was a single one or part of a sequence:
> https://marc.info/?l=openbsd-tech&m=141240304628749&w=2
> 
> Below an updated version of that patch.

Looks right and works fine for me. OK halex@

/Alexander

> 
> natano
> 
> 
> Index: edit.c
> ===
> RCS file: /cvs/src/bin/ksh/edit.c,v
> retrieving revision 1.53
> diff -u -p -r1.53 edit.c
> --- edit.c17 Mar 2016 23:33:23 -  1.53
> +++ edit.c8 Aug 2016 06:19:34 -
> @@ -10,6 +10,7 @@
>  
>  #include 
>  #include 
> +#include 
>  
>  #include 
>  #include 
> @@ -149,6 +150,16 @@ x_puts(const char *s)
>  {
>   while (*s != 0)
>   shf_putc(*s++, shl_out);
> +}
> +
> +int
> +x_avail(void)
> +{
> + struct pollfd pfd[1];
> +
> + pfd[0].fd = STDIN_FILENO;
> + pfd[0].events = POLLIN;
> + return poll(pfd, 1, 0) == 1;
>  }
>  
>  bool
> Index: edit.h
> ===
> RCS file: /cvs/src/bin/ksh/edit.h,v
> retrieving revision 1.11
> diff -u -p -r1.11 edit.h
> --- edit.h26 Jan 2016 17:39:31 -  1.11
> +++ edit.h8 Aug 2016 06:19:34 -
> @@ -39,6 +39,7 @@ int x_getc(void);
>  void x_flush(void);
>  void x_putc(int);
>  void x_puts(const char *);
> +int  x_avail(void);
>  bool x_mode(bool);
>  int  promptlen(const char *, const char **);
>  int  x_do_comment(char *, int, int *);
> Index: emacs.c
> ===
> RCS file: /cvs/src/bin/ksh/emacs.c,v
> retrieving revision 1.65
> diff -u -p -r1.65 emacs.c
> --- emacs.c   26 Jan 2016 17:39:31 -  1.65
> +++ emacs.c   8 Aug 2016 06:19:35 -
> @@ -893,9 +893,12 @@ x_search_hist(int c)
>   if ((c = x_e_getc()) < 0)
>   return KSTD;
>   f = kb_find_hist_func(c);
> - if (c == CTRL('['))
> + if (c == CTRL('[')) {
> + /* might be part of an escape sequence */
> + if (x_avail())
> + x_e_ungetc(c);
>   break;
> - else if (f == x_search_hist)
> + } else if (f == x_search_hist)
>   offset = x_search(pat, 0, offset);
>   else if (f == x_del_back) {
>   if (p == pat) {
> 
> 
> > 
> > Yours,
> >   Ingo
> > 
> > 
> > Dave Cohen wrote on Sun, Aug 07, 2016 at 04:52:50PM -0700:
> > 
> > > I'll try to describe an annoyance with my ksh setup.  Web and man
> > > page searching has not provided a solution.  I'm relatively new to
> > > both ksh and openbsd. I'm on version 5.9 release.
> > > 
> > > Problem happens when I navigate command history with ctrl-r, then
> > > use left or right arrow.  Hitting left arrow writes "[D", right
> > > inserts "[C".  I'm hitting the arrow keys so I can edit my prior
> > > command.  It's a habit I'm used to that works in bash.
> > > 
> > > For example to reproduce, let's say I ran "ls -l" but I wanted
> > > to run "ls -la"...
> > > 
> > > run the first command, "ls -l".
> > > 
> > > type "ctrl-r ls".  This works as expected, and my cursor is now
> > > in the middle of "ls -l".
> > > 
> > > type right arrow.  This is where the problem is.  The command I'm
> > > editing becomes "ls[C -l".
> > > 
> > > From this point, arrow keys work as expected.  I can use left or
> > > right to navigate and edit the command.
> > > 
> > > If, instead of arrows, I use ctrl-b or ctrl-f, these work fine.
> > > No artifacts like "[C" or "[D".
> > > 
> > > If I use bash instead of ksh, this problem does not occur.
> > > 
> > [...]
> > > I understand from `man ksh` that these key bindings are defaults:
> > >bind '^[[C'=forward-char
> > >bind '^[[D'=backward-char
> > > 
> > > My assumption is that when in ctrl-r mode, the '^[' is interpreted
> > > as part of the ctrl-r search (which doesn't match), then the '[C'
> > > or '[D' is interpreted as the next key (which is inserted).  Can
> > > this behavior be changed?
> > 
> > 
> > Index: emacs.c
> > ==

Re: nd6 router adv problem: valid lifetime is 0

2016-08-08 Thread Martin Pieuchot
On 08/06/16 00:00, Stuart Henderson wrote:
> An update on this.
> 
> Looking at flags after a hint from phrased - they're not setting the
> on-link flag (intentionally, to prevent flooding multicast NDs over the
> wlan, which makes sense) so it's hitting the XXX case in nd6_rtr.c which
> replaces lifetimes with 0.

But since Nov 2000, IPv6 autoconf address deletion is independent from
prefix lifetimes.  So I believe this check can go.


Index: netinet6/nd6_rtr.c
===
RCS file: /cvs/src/sys/netinet6/nd6_rtr.c,v
retrieving revision 1.140
diff -u -p -r1.140 nd6_rtr.c
--- netinet6/nd6_rtr.c	5 Jul 2016 10:17:14 -	1.140
+++ netinet6/nd6_rtr.c	8 Aug 2016 10:33:02 -
@@ -1249,19 +1249,6 @@ prelist_update(struct nd_prefix *new, st
 			goto end; /* we should just give up in this case. */
 		}
 
-		/*
-		 * XXX: from the ND point of view, we can ignore a prefix
-		 * with the on-link bit being zero.  However, we need a
-		 * prefix structure for references from autoconfigured
-		 * addresses.  Thus, we explicitly make sure that the prefix
-		 * itself expires now.
-		 */
-		if (newpr->ndpr_raf_onlink == 0) {
-			newpr->ndpr_vltime = 0;
-			newpr->ndpr_pltime = 0;
-			in6_init_prefix_ltimes(newpr);
-		}
-
 		pr = newpr;
 	}
 


Re: socket splice task

2016-08-08 Thread Mark Kettenis
> From: Martin Pieuchot 
> Date: Mon, 8 Aug 2016 12:17:30 +0200
> 
> On 07/30/16 02:41, Alexander Bluhm wrote:
> > On Fri, Jul 29, 2016 at 08:07:14PM -0400, Ted Unangst wrote:
> >> There's a sched_yield() in taskq_thread(). Something's not quite right.
> > 
> > It is a sched_pause() in taskq_thread().
> > 
> > If I replace my yield() with sched_pause(), the userland hangs
> > during splicing.  Looks like SPCF_SHOULDYIELD is not set.
> 
> Are you sure it is not set?  Or does the scheduler keeps selecting your
> task?

That would indeed be interesting to know.  As far as I can tell it
should be set.

SPCF_SHOULDYIELD only gets set if the process has been hogging the CPU
for more than a single tick (it's set the second time the process goes
trhough roudrobin()).  With a 100 Hz clock that might simply be too
much for smooth interactive behaviour, especially if there is a lot of
other high-priority kernel work to be done.



Re: socket splice task

2016-08-08 Thread Martin Pieuchot
On 07/30/16 02:41, Alexander Bluhm wrote:
> On Fri, Jul 29, 2016 at 08:07:14PM -0400, Ted Unangst wrote:
>> There's a sched_yield() in taskq_thread(). Something's not quite right.
> 
> It is a sched_pause() in taskq_thread().
> 
> If I replace my yield() with sched_pause(), the userland hangs
> during splicing.  Looks like SPCF_SHOULDYIELD is not set.

Are you sure it is not set?  Or does the scheduler keeps selecting your
task?

> During my test I run "while date; do sleep 1; done" on the console.
> It does not make progress as long data is spliced.



Re: socket splice task

2016-08-08 Thread Martin Pieuchot
On 07/30/16 01:09, Alexander Bluhm wrote:
> On Fri, Jul 29, 2016 at 06:46:52PM -0400, Ted Unangst wrote:
>> Alexander Bluhm wrote:
>>> +   /* Avoid user land starvation. */
>>> +   yield();
>>
>> you don't need to yield here, the task framework should do that for you.
> 
> Perhaps the framework should do that, but it does not.  When I run
> my splicing test on a qemu virtual machine with vio interfaces, an
> interactive shell hangs completely while data is getting spliced
> with full load.  The yield() keeps it interactive.

The difference is that you yield() after every somove(9) while
sched_pause() only fires after two roundrobin() calls which should be
~200ms.

> Adding a task increases througput for about 10%.  Without the yield()
> it might be a bit faster, but hanging userland is really anoying.

The approach used in if_netisr() is a bit different.  Instead of
scheduling one task per mbuf to process we use a single one with a
loop.  It would be interesting to see if this approach also exposes
the sched_yield() problem or if we're just lucky.

> I have tested it a year ago on real hardware, the result was the
> same.  Back then there was a discussion wether a softnet interrupt
> would be better.  It was a bit faster, but also could starve the
> userland.  Now we are heading for threads in the network stack, so
> I think a task with yield() is the best solution.

I am not sure adding a thread that yield() per subsystem is the best
solution.  That said I agree that executing more code in thread context
has its benefit but we're going expose some bugs/limitations of our
current scheduling code.

So I think that a good understanding of the yield() problem in this case
is a step forward.



Re: RELRO: mwuauahhahaha

2016-08-08 Thread Mark Kettenis
> Date: Sun, 7 Aug 2016 20:06:39 -0700
> From: Philip Guenther 
> 
> On Sun, 7 Aug 2016, Philip Guenther wrote:
> > On Sun, 7 Aug 2016, Mark Kettenis wrote:
> > ...
> > > Going to give this a go on hppa to see if I can spot any issues there.
> > 
> > It works there just as well as before for me, with no new failures in my 
> > "try lots of ld option combinations" test.  Since it has an executable 
> > .plt, to make relro work will require a layout more like sparc64, with the 
> > .plt section hoisted to before the data.  I need to see what the 
> > limitations on .plt<-->.got distance are to see where .got goes for that.
> 
> New diff, adding relro support for hppa!  Yay!

So I looked a bit closer at this version instead ;).

Mostly like what I see.  It keeps .plt and .got together which is
indeed essential.  Moving those before .data will have some
consequences though since it reduces the amount of .data and .bss that
can be reached by short branches.  I believe the linker will emit long
branching stubs though.  I'll try to build some big stuff on hppa
here.  On the bright side, this makes my hack for emacs unnecessary
since .data and .bss are now together in a single segment.

Even if some of the bigger ports don't build anymore, this shouldn't
be a show-stopper.



Re: socket splice task

2016-08-08 Thread Martin Pieuchot
On 07/30/16 00:17, Alexander Bluhm wrote:
> Hi,
> 
> Spliced TCP sockets become faster if we put the output part into
> its own task thread.  This is inspired by userland copy where we
> also have to go through the scheduler.  This gives the socket buffer
> a chance to be filled up and tcp_output() is called less often and
> with bigger chunks.

This is really interesting.  Do you have a clear understanding why is
it getting faster?  I'm worried about introducing a hack just for socket
splicing and would prefer a more generic solution if possible.

> 
> ok?
> 
> bluhm
> 
> Index: kern/uipc_socket.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v
> retrieving revision 1.152
> diff -u -p -r1.152 uipc_socket.c
> --- kern/uipc_socket.c13 Jun 2016 21:24:43 -  1.152
> +++ kern/uipc_socket.c29 Jul 2016 15:44:33 -
> @@ -59,6 +59,7 @@ voidsbsync(struct sockbuf *, struct mbu
>  int  sosplice(struct socket *, int, off_t, struct timeval *);
>  void sounsplice(struct socket *, struct socket *, int);
>  void soidle(void *);
> +void sotask(void *);
>  int  somove(struct socket *, int);
>  
>  void filt_sordetach(struct knote *kn);
> @@ -85,6 +86,7 @@ int sominconn = SOMINCONN;
>  struct pool socket_pool;
>  #ifdef SOCKET_SPLICE
>  struct pool sosplice_pool;
> +struct taskq *sosplice_taskq;
>  #endif
>  
>  void
> @@ -1041,6 +1043,7 @@ sorflush(struct socket *so)
>  #define so_splicemax so_sp->ssp_max
>  #define so_idletvso_sp->ssp_idletv
>  #define so_idletoso_sp->ssp_idleto
> +#define so_splicetaskso_sp->ssp_task
>  
>  int
>  sosplice(struct socket *so, int fd, off_t max, struct timeval *tv)
> @@ -1049,6 +1052,10 @@ sosplice(struct socket *so, int fd, off_
>   struct socket   *sosp;
>   int  s, error = 0;
>  
> + if (sosplice_taskq == NULL)
> + sosplice_taskq = taskq_create("sosplice", 1, IPL_SOFTNET,
> + TASKQ_CANTSLEEP);
> +
>   if ((so->so_proto->pr_flags & PR_SPLICE) == 0)
>   return (EPROTONOSUPPORT);
>   if (so->so_options & SO_ACCEPTCONN)
> @@ -1126,6 +1133,7 @@ sosplice(struct socket *so, int fd, off_
>   else
>   timerclear(&so->so_idletv);
>   timeout_set(&so->so_idleto, soidle, so);
> + task_set(&so->so_splicetask, sotask, so);
>  
>   /*
>* To prevent softnet interrupt from calling somove() while
> @@ -1149,6 +1157,7 @@ sounsplice(struct socket *so, struct soc
>  {
>   splsoftassert(IPL_SOFTNET);
>  
> + task_del(sosplice_taskq, &so->so_splicetask);
>   timeout_del(&so->so_idleto);
>   sosp->so_snd.sb_flagsintr &= ~SB_SPLICE;
>   so->so_rcv.sb_flagsintr &= ~SB_SPLICE;
> @@ -1171,6 +1180,27 @@ soidle(void *arg)
>   splx(s);
>  }
>  
> +void
> +sotask(void *arg)
> +{
> + struct socket *so = arg;
> + int s;
> +
> + s = splsoftnet();
> + if (so->so_rcv.sb_flagsintr & SB_SPLICE) {
> + /*
> +  * We may not sleep here as sofree() and unsplice() may be
> +  * called from softnet interrupt context.  This would remove
> +  * the socket during somove().
> +  */
> + somove(so, M_DONTWAIT);
> + }
> + splx(s);
> +
> + /* Avoid user land starvation. */
> + yield();
> +}
> +
>  /*
>   * Move data from receive buffer of spliced source socket to send
>   * buffer of drain socket.  Try to move as much as possible in one
> @@ -1444,19 +1474,26 @@ somove(struct socket *so, int wait)
>   return (1);
>  }
>  
> -#undef so_splicelen
> -#undef so_splicemax
> -#undef so_idletv
> -#undef so_idleto
> -
>  #endif /* SOCKET_SPLICE */
>  
>  void
>  sorwakeup(struct socket *so)
>  {
>  #ifdef SOCKET_SPLICE
> - if (so->so_rcv.sb_flagsintr & SB_SPLICE)
> - (void) somove(so, M_DONTWAIT);
> + if (so->so_rcv.sb_flagsintr & SB_SPLICE) {
> + /*
> +  * TCP has a sendbuffer that can handle multiple packets
> +  * at once.  So queue the stream a bit to accumulate data.
> +  * The sosplice thread will call somove() later and send
> +  * the packets calling tcp_output() only once.
> +  * In the UDP case, send out the packets immediately.
> +  * Using a thread would make things slower.
> +  */
> + if (so->so_proto->pr_flags & PR_WANTRCVD)
> + task_add(sosplice_taskq, &so->so_splicetask);
> + else
> + somove(so, M_DONTWAIT);
> + }
>   if (isspliced(so))
>   return;
>  #endif
> @@ -1470,7 +1507,7 @@ sowwakeup(struct socket *so)
>  {
>  #ifdef SOCKET_SPLICE
>   if (so->so_snd.sb_flagsintr & SB_SPLICE)
> - (void) somove(so->so_sp->ssp_soback, M_DONTWAIT);
> + task_add(sosplice_taskq, &so->so_sp->ssp_soback->so_splicetask);
>  #endif
>   sowakeup(so, &so-

Correct order of route removal

2016-08-08 Thread Martin Pieuchot
The rtable_walk() & prio bug I just sent a fix for should theoretically
not cause any trouble.  Sadly it is piled on top of another bug for
which a fix is attached.

When an interface is removed the current code starts by purging all its
corresponding route entries.  This is wrong because the per-AF code has
some knowledge of which automagic route should be removed first.

In other words, the rtable_walk() hang should never have been triggered
because the IPv4-specific code should take care of removing the
RTF_BROADCAST entry.  I believe that this ordering problem is the reason
why error code are ignored in AF-specific code paths.

Diff attached fixes that, ok?
Index: net/if.c
===
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.436
diff -u -p -r1.436 if.c
--- net/if.c	13 Jul 2016 16:45:19 -	1.436
+++ net/if.c	22 Jul 2016 12:45:28 -
@@ -931,7 +931,6 @@ if_detach(struct ifnet *ifp)
 #if NBPFILTER > 0
 	bpfdetach(ifp);
 #endif
-	rt_if_remove(ifp);
 	rti_delete(ifp);
 #if NETHER > 0 && defined(NFSCLIENT)
 	if (ifp->if_index == revarp_ifidx)
@@ -944,6 +943,7 @@ if_detach(struct ifnet *ifp)
 #ifdef INET6
 	in6_ifdetach(ifp);
 #endif
+	rt_if_remove(ifp);
 #if NPF > 0
 	pfi_detach_ifnet(ifp);
 #endif
@@ -1931,15 +1931,15 @@ ifioctl(struct socket *so, u_long cmd, c
 			 */
 			if (up)
 if_down(ifp);
-			rt_if_remove(ifp);
 			rti_delete(ifp);
 #ifdef MROUTING
 			vif_delete(ifp);
 #endif
+			in_ifdetach(ifp);
 #ifdef INET6
 			in6_ifdetach(ifp);
 #endif
-			in_ifdetach(ifp);
+			rt_if_remove(ifp);
 			splx(s);
 		}
 


rtable_walk() hand and route prio

2016-08-08 Thread Martin Pieuchot
On the train back from n2k16 I found the real cause of the hang reported
by Dimitris Papastamos [0] and exposed by our recent
changes to the routing table.

When an interface is removed/detached the kernel delete all the
corresponding route entries.  At this moment the interface is
DOWN and the corresponding route as well.  So the priority check
should consider that.

Without the diff attached or the workaround to stop iterating when
an error occurs, the kernel would loop forever since it can't remove
the RTF_BROADCAST entry.

[0] https://marc.info/?l=openbsd-bugs&m=146909621511954&w=2

ok?
Index: net/route.c
===
RCS file: /cvs/src/sys/net/route.c,v
retrieving revision 1.313
diff -u -p -r1.313 route.c
--- net/route.c	22 Jul 2016 11:03:30 -	1.313
+++ net/route.c	8 Aug 2016 09:33:15 -
@@ -873,7 +873,7 @@ rtrequest_delete(struct rt_addrinfo *inf
 	 * kernel.
 	 */
 	if ((rt->rt_flags & (RTF_LOCAL|RTF_BROADCAST)) &&
-	prio != RTP_LOCAL) {
+	(prio & RTP_MASK) != RTP_LOCAL) {
 		rtfree(rt);
 		return (EINVAL);
 	}


Re: Fix Wacom Intuos S 2 descriptor and make wsmouse work

2016-08-08 Thread Martin Pieuchot
On 08/07/16 19:30, Frank Groeneveld wrote:
> I recently acquired a new Wacom drawing tablet: an Intuos Draw, which
> seems to be called an Intuos S 2 internally. I couldn't get this tablet
> to work in OpenBSD. One of the HID descriptors describes a mouse, so
> ums(4) attaches to it, but no data is ever registered. After digging
> around in the Linux driver, it appears that the actual data is reported
> in a different descriptor, one that is (possibly on purpose) incorrect.
> 
> After writing a correct device descriptor I had to work around two other
> problems:
> 
> - X & Y data is reported in big endian format.
> - Mouse button 1 data is flakey, the linux driver uses pressure to work
>   around this.
> 
> To get the tablet to function correctly, I chose to implement quirks for
> this in hidms(4). Attached patch makes the tablet function with mouse
> movement and all three stylus events (tip touch and 2 buttons) without
> any needed configuration. This is enough functionality for for me, I
> just use it as a mouse replacement to avoid getting RSI.
> 
> Example dmesg output on attachment:
> 
> uhidev0 at uhub3 port 2 configuration 1 interface 0 "Wacom Co.,Ltd.
> Intuos PS" rev 2.00/1.00 addr 3
> uhidev0: iclass 3/0, 192 report ids
> ums0 at uhidev0 reportid 16: 3 buttons
> wsmouse1 at ums0 mux 0
> uhid0 at uhidev0 reportid 192: input=4, output=0, feature=0
> uhidev1 at uhub3 port 2 configuration 1 interface 1 "Wacom Co.,Ltd.
> Intuos PS" rev 2.00/1.00 addr 3
> uhidev1: iclass 3/0, 3 report ids
> uhid1 at uhidev1 reportid 2: input=63, output=0, feature=0
> uhid2 at uhidev1 reportid 3: input=63, output=0, feature=0
> uhidev2 at uhub3 port 2 configuration 1 interface 2 "Wacom Co.,Ltd.
> Intuos PS" rev 2.00/1.00 addr 3
> uhidev2: iclass 3/1, 1 report id
> ums1 at uhidev2 reportid 1: 5 buttons
> wsmouse2 at ums1 mux 0
> 
> The first tattached umse is the functioning one that is added by this
> patch.

Nice.

> Is the attached patch acceptable? Or would it be better to write a
> seperate driver, such as uwacom, that does the same as ums, except that
> it doesn't call hidms_input, but implements that itself?

I believe that a new driver makes more sense.  Because if one wants to
extend your work to fully support the drawing table it won't be able to
do it in ums(4).

I afraid you'll need to match the two interfaces of your device,
something which is not trivial with the current framework.

Newer versions of libinput include some support for various Intuos
devices, you might want to look at this if you haven't done it already.



Re: man9/config_attach.9: incorporate config_deactivate information

2016-08-08 Thread Martin Pieuchot
On 08/08/16 07:38, Ian Sutton wrote:
> On Sun, Aug 07, 2016 at 10:50:49PM -0600, Theo de Raadt wrote:
>> Your diff also contained:
>>
>> -.Fn config_detach "struct device *dev" "int flags"
>> +.Fn config_detach "struct device *dev"
> 
> Argh! I had initially given config_deactivate() a flags parameter,
> realized it lacked one in the code, and must have also accidently
> removed it from _detach() accidently. Patch below removes this error.
> Page also uploaded for convenience:
> 
> http://ce.gl/config_attach.9.txt
> 
>> I think you are reading NetBSD?
> 
> I have not used nor even tried any other BSDs.

I'm not sure this function should be documented.  Instead you could see
where it can be removed or replaced by config_deactivate_children(9).
This function also has the advantage of simplifying drivers since they
no longer need to keep an extra reference for their child.

That's the function that need to be documented and IMHO I'd start a new
manual because it's not straightforward.

config_deactivate(9) is misleading on OpenBSD because as you've seen it
no longer match its original design.  Hopefully we should be able to
make it private.


> Index: config_attach.9
> ===
> RCS file: /cvs/src/share/man/man9/config_attach.9,v
> retrieving revision 1.3
> diff -u -p -r1.3 config_attach.9
> --- config_attach.9   5 Dec 2014 16:55:53 -   1.3
> +++ config_attach.9   8 Aug 2016 05:31:34 -
> @@ -34,7 +34,8 @@
>  .Sh NAME
>  .Nm config_attach ,
>  .Nm config_detach ,
> -.Nm config_detach_children
> +.Nm config_detach_children ,
> +.Nm config_deactivate
>  .Nd attach and detach devices
>  .Sh SYNOPSIS
>  .In sys/param.h
> @@ -46,6 +47,8 @@
>  .Fn config_detach "struct device *dev" "int flags"
>  .Ft "int"
>  .Fn config_detach_children "struct device *parent" "int flags"
> +.Ft "int"
> +.Fn config_deactivate "struct device *dev"
>  .Sh DESCRIPTION
>  The
>  .Fn config_attach
> @@ -71,6 +74,21 @@ contains detachment flags:
>  #define  DETACH_FORCE0x01/* Force detachment; hardware
> gone */
>  #define  DETACH_QUIET0x02/* Don't print a notice */
>  .Ed
> +.Pp
> +The
> +.Fn config_deactivate
> +function is called by the parent to change the child device's
> operational state
> +by calling the driver's activate function with a
> +.Fa flags
> +argument describing the targeted operational state:
> +.Bd -literal
> +#define  DVACT_DEACTIVATE1   /* deactivate the device
> */
> +#define  DVACT_QUIESCE   2   /* warn the device about
> suspend */
> +#define  DVACT_SUSPEND   3   /* suspend the device */
> +#define  DVACT_RESUME4   /* resume the device */
> +#define  DVACT_WAKEUP5   /* tell device to
> recover after resume */
> +#define  DVACT_POWERDOWN 6   /* power device down */
> +.Ed
>  .Sh CONTEXT
>  .Fn config_detach
>  is always called from process context, allowing
> @@ -79,6 +97,8 @@ to be called while the device detaches i
>  which have a device open).
>  .Sh RETURN VALUES
>  .Fn config_detach
> -returns zero if successful and an error code otherwise.
> +and
> +.Fn config_deactivate
> +return zero if successful and an error code otherwise.
>  .Sh SEE ALSO
>  .Xr config_found 9
> 



Re: RELRO: static PIE support

2016-08-08 Thread Mark Kettenis
> Date: Sun, 7 Aug 2016 20:01:58 -0700
> From: Philip Guenther 
> 
> On Sun, 7 Aug 2016, Philip Guenther wrote:
> 
> > On Sun, Aug 7, 2016 at 4:45 AM, Mark Kettenis  
> > wrote:
> > >> Date: Sat, 6 Aug 2016 22:17:47 -0700
> > >> From: Philip Guenther 
> > >>
> > >> Parts of this were stolen from kettenis.
> > >>
> > >> Hey, wouldn't it be cool if static PIE executables took advantage of the
> > >> RELRO information too?
> > >
> > > Hmm.  The PLT doesn't have to be executable for static PIE
> > > executables.  So I think you could simply transition to PROT_READ like
> > > the old code does.
> > 
> > Perhaps we should make it PROT_NONE.  :-)
> 
> ...except that doesn't work for hppa, when I do relro with .plt and .got 
> together in the same section.  PROT_READ it is!
> 
> Tested on hppa with the relro update in the next message...

ok kettenis@, but

> Index: lib/csu/boot.h
> ===
> RCS file: /data/src/openbsd/src/lib/csu/boot.h,v
> retrieving revision 1.21
> diff -u -p -r1.21 boot.h
> --- lib/csu/boot.h7 Aug 2016 02:44:00 -   1.21
> +++ lib/csu/boot.h8 Aug 2016 03:00:53 -
> @@ -86,8 +86,6 @@ struct boot_dyn {
>   */
>  void _dl_boot_bind(const long, long *, Elf_Dyn *);
>  
> -extern char __plt_start[];
> -extern char __plt_end[];
>  extern char __got_start[];
>  extern char __got_end[];
>  
> @@ -106,6 +104,7 @@ _dl_boot_bind(const long sp, long *dl_da
>   longloff;
>   int prot_exec = 0;
>   RELOC_TYPE  *rp;
> + Elf_Phdr*phdp;
>   Elf_Addri;
>  
>   /*
> @@ -220,12 +219,29 @@ _dl_boot_bind(const long sp, long *dl_da
>   else
>   pagesize = 4096;
>  
> + /* do any RWX -> RX fixups for executable PLTs and apply GNU_RELRO */
> + phdp = (Elf_Phdr *)dl_data[AUX_phdr];
> + for (i = 0; i < dl_data[AUX_phnum]; i++, phdp++) {
> + switch (phdp->p_type) {
>  #if defined(__alpha__) || defined(__powerpc__) || defined(__sparc__) || \
> -defined(__sparc64__)
> - start = ELF_TRUNC((Elf_Addr)__plt_start, pagesize);
> - size = ELF_ROUND((Elf_Addr)__plt_end - start, pagesize);
> - mprotect((void *)start, size, PROT_READ);
> +defined(__sparc64__) || defined(__hppa__)


Could you keep that list of architectures sorted alphabetically?  Yes
I know, OCD and all that...

> + case PT_LOAD:
> + if ((phdp->p_flags & (PF_X | PF_W)) != (PF_X | PF_W))
> + break;
> + mprotect((void *)(phdp->p_vaddr + loff), phdp->p_memsz,
> + PROT_READ);
> + break;
>  #endif
> + case PT_GNU_RELRO:
> + mprotect((void *)(phdp->p_vaddr + loff), phdp->p_memsz,
> + PROT_READ);
> + /*
> +  * GNU_RELRO (a) covers the GOT, and (b) comes after
> +  * all LOAD sections, so if we found it then we're done
> +  */
> + return;
> + }
> + }
>  
>  #if defined(__powerpc__)
>   if (dynld.dt_proc[DT_PROC(DT_PPC_GOT)] == 0)
> 



Re: RELRO: support in ld.so

2016-08-08 Thread Mark Kettenis
> Date: Sat, 6 Aug 2016 18:58:57 -0700
> From: Philip Guenther 
> 
> Let's teach ld.so to look for a PT_GNU_RELRO section per object and, if 
> present, mprotect(PROT_READ) the range it covers *instead of* the 
> __got_start .. __got_end range.
> 
> Two interesting bits are in here:
>  1) we need to move up the handling of DT_DEBUG to before relocation is 
> done, so that the .dynamic segment can be covered by the PT_GNU_RELRO
> section.  That's the bulk of the loader.c diff, though diff shows it
> as moving the _dl_rtld() call and such *down* instead of moving
> the DT_DEBUG stuff *up*
> 
>  2) _dl_protect_segment() is used for both __got_start/end and 
> __plt_start/end.  The latter should be going away soonish, so
> to tell the calls apart I just test the 3rd character of the symbol.
> Gross, but it works and will go away once the PLT stuff is gone.
> As a plus, turning on LD_DEBUG will show whether GNU_RELRO is used.  
> For example, on my full-relro laptop:
> ...
> examining: '/usr/lib/libc.so.88.0'
>  flags /usr/libexec/ld.so = 0x0
> obj /usr/libexec/ld.so has vis as head
> protect start RELRO = 0x1557ed825b38 in /usr/lib/libc.so.88.0
> protect end RELRO = 0x1557ed827000 in /usr/lib/libc.so.88.0
> protect start RELRO = 0x155513801d88 in vis
> protect end RELRO = 0x155513802000 in vis
> StartEnd  Type Open Ref GrpRef Name
> 15551360 155513803000 exe  10   0  vis
> 1557ed56e000 1557ed839000 rlib 01   0  
> /usr/lib/libc.so.88.0
> 155751c0 155751c0 rtld 01   0  
> /usr/libexec/ld.so
> ...
> 
> 
> ok?

ok kettenis@

> Index: resolve.h
> ===
> RCS file: /data/src/openbsd/src/libexec/ld.so/resolve.h,v
> retrieving revision 1.78
> diff -u -p -r1.78 resolve.h
> --- resolve.h 4 Jul 2016 21:15:06 -   1.78
> +++ resolve.h 4 Jul 2016 21:15:18 -
> @@ -148,6 +148,10 @@ struct elf_object {
>   const void  *tls_static_data;
>   int tls_offset;
>  
> + /* relro bits */
> + Elf_Addrrelro_addr;
> + Elf_Addrrelro_size;
> +
>   /* generation number of last grpsym insert on this object */
>   unsigned int grpsym_gen;
>  
> Index: library.c
> ===
> RCS file: /data/src/openbsd/src/libexec/ld.so/library.c,v
> retrieving revision 1.77
> diff -u -p -r1.77 library.c
> --- library.c 4 Jul 2016 21:15:06 -   1.77
> +++ library.c 18 Jul 2016 12:41:48 -
> @@ -98,6 +98,7 @@ _dl_tryload_shlib(const char *libname, i
>   struct load_list *next_load, *load_list = NULL;
>   Elf_Addr maxva = 0, minva = ELFDEFNNAME(NO_ADDR);
>   Elf_Addr libaddr, loff, align = _dl_pagesz - 1;
> + Elf_Addr relro_addr = 0, relro_size = 0;
>   elf_object_t *object;
>   charhbuf[4096];
>   Elf_Dyn *dynp = NULL;
> @@ -281,6 +282,11 @@ _dl_tryload_shlib(const char *libname, i
>   phdp->p_memsz);
>   break;
>  
> + case PT_GNU_RELRO:
> + relro_addr = phdp->p_vaddr + loff;
> + relro_size = phdp->p_memsz;
> + break;
> +
>   default:
>   break;
>   }
> @@ -299,6 +305,8 @@ _dl_tryload_shlib(const char *libname, i
>   object->dev = sb.st_dev;
>   object->inode = sb.st_ino;
>   object->obj_flags |= flags;
> + object->relro_addr = relro_addr;
> + object->relro_size = relro_size;
>   _dl_set_sod(object->load_name, &object->sod);
>   if (ptls != NULL && ptls->p_memsz)
>   _dl_set_tls(object, ptls, libaddr, libname);
> Index: library_mquery.c
> ===
> RCS file: /data/src/openbsd/src/libexec/ld.so/library_mquery.c,v
> retrieving revision 1.54
> diff -u -p -r1.54 library_mquery.c
> --- library_mquery.c  4 Jul 2016 21:15:06 -   1.54
> +++ library_mquery.c  18 Jul 2016 12:41:54 -
> @@ -108,6 +108,7 @@ _dl_tryload_shlib(const char *libname, i
>   Elf_Addr load_end = 0;
>   Elf_Addr align = _dl_pagesz - 1, off, size;
>   Elf_Phdr *ptls = NULL;
> + Elf_Addr relro_addr = 0, relro_size = 0;
>   struct stat sb;
>   char hbuf[4096];
>  
> @@ -297,10 +298,15 @@ retry:
>   }
>  
>   phdp = (Elf_Phdr *)(hbuf + ehdr->e_phoff);
> - for (i = 0; i < ehdr->e_phnum; i++, phdp++)
> + for (i = 0; i < ehdr->e_phnum; i++, phdp++) {
>   if (phdp->p_type == PT_OPENBSD_RANDOMIZE)
>   _dl_randombuf((char *)(phdp->p_vaddr + LOFF),
>   phdp->p_memsz);
> + else if (phdp->p_type == PT_GNU_RELRO) {
> + relro_addr = phdp->p_vaddr + LOFF;
> +