Re: arm64 MP

2018-02-19 Thread Otto Moerbeek
On Tue, Feb 20, 2018 at 08:52:20AM +0100, Mark Kettenis wrote:

> > Date: Mon, 19 Feb 2018 13:49:48 +0100 (CET)
> > From: Mark Kettenis 
> > 
> > The diff below attempts to make the arm64 pmap "mpsafe" and enables MP
> > support.  This diff survived a full build on my Firefly-RK3399 board.
> > It also seems to work on the Overdrive 1000.  It might even work on
> > the Raspberry Pi 3.  I'd appreciate it if people could play with this
> > on the Raspberry Pi and other arm64 hardware they have.
> > 
> > I tried to follow a strategy for making the pmap "mpsafe" that more
> > closely resembles what we did for our other architectures, although I
> > looked at Dale Rahn's diff as well.  It seems I fixed at least some
> > bugs in the process.  But I'm sure there are bugs remaining.  I may
> > even have introduced new ones.  So do expect this to crash and burn,
> > or at least lock up.
> > 
> > I'm also interested in people testing the normal GENERIC kernel with
> > this diff, especially building ports.  There are some changes in there
> > that could affect non-MP as well.
> 
> Here is a slightly better diff that removes a potential lock ordering
> problem, removes a redundant splvm and moves some code around to avoid
> a level of indentation.  Gaining confidence, so I'm looking for ok's
> now.

I have been running you diff on my rpi (together with jsg firmware update).

My rpi has hung twice now while doing a make -j4 in the clang part of
the tree.

The last top screen shows it using swap (around 400M) and has three
c++ processes each with resident size around 200M.

I'll try this diff now.

-Otto

> 
> Index: arch/arm64/arm64/cpu.c
> ===
> RCS file: /cvs/src/sys/arch/arm64/arm64/cpu.c,v
> retrieving revision 1.13
> diff -u -p -r1.13 cpu.c
> --- arch/arm64/arm64/cpu.c31 Jan 2018 10:52:12 -  1.13
> +++ arch/arm64/arm64/cpu.c19 Feb 2018 12:28:05 -
> @@ -454,13 +454,8 @@ cpu_start_secondary(struct cpu_info *ci)
>  
>   spllower(IPL_NONE);
>  
> -#ifdef notyet
>   SCHED_LOCK(s);
>   cpu_switchto(NULL, sched_chooseproc());
> -#else
> - for (;;)
> - __asm volatile("wfe");
> -#endif
>  }
>  
>  void
> Index: arch/arm64/arm64/pmap.c
> ===
> RCS file: /cvs/src/sys/arch/arm64/arm64/pmap.c,v
> retrieving revision 1.47
> diff -u -p -r1.47 pmap.c
> --- arch/arm64/arm64/pmap.c   31 Jan 2018 23:23:16 -  1.47
> +++ arch/arm64/arm64/pmap.c   19 Feb 2018 12:28:05 -
> @@ -165,6 +165,19 @@ struct mem_region *pmap_allocated = &pma
>  int pmap_cnt_avail, pmap_cnt_allocated;
>  uint64_t pmap_avail_kvo;
>  
> +static inline void
> +pmap_lock(struct pmap *pmap)
> +{
> + if (pmap != pmap_kernel())
> + mtx_enter(&pmap->pm_mtx);
> +}
> +
> +static inline void
> +pmap_unlock(struct pmap *pmap)
> +{
> + if (pmap != pmap_kernel())
> + mtx_leave(&pmap->pm_mtx);
> +}
>  
>  /* virtual to physical helpers */
>  static inline int
> @@ -362,6 +375,7 @@ pmap_vp_page_alloc(struct pool *pp, int 
>   void *v;
>  
>   kd.kd_waitok = ISSET(flags, PR_WAITOK);
> + kd.kd_trylock = ISSET(flags, PR_NOWAIT);
>   kd.kd_slowdown = slowdown;
>  
>   KERNEL_LOCK();
> @@ -439,14 +453,20 @@ pmap_enter_pv(struct pte_desc *pted, str
>   if (__predict_false(!pmap_initialized))
>   return;
>  
> + mtx_enter(&pg->mdpage.pv_mtx);
>   LIST_INSERT_HEAD(&(pg->mdpage.pv_list), pted, pted_pv_list);
>   pted->pted_va |= PTED_VA_MANAGED_M;
> + mtx_leave(&pg->mdpage.pv_mtx);
>  }
>  
>  void
>  pmap_remove_pv(struct pte_desc *pted)
>  {
> + struct vm_page *pg = PHYS_TO_VM_PAGE(pted->pted_pte & PTE_RPGN);
> +
> + mtx_enter(&pg->mdpage.pv_mtx);
>   LIST_REMOVE(pted, pted_pv_list);
> + mtx_leave(&pg->mdpage.pv_mtx);
>  }
>  
>  int
> @@ -454,7 +474,7 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
>  {
>   struct pte_desc *pted;
>   struct vm_page *pg;
> - int s, error;
> + int error;
>   int cache = PMAP_CACHE_WB;
>   int need_sync = 0;
>  
> @@ -464,9 +484,7 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
>   cache = PMAP_CACHE_DEV;
>   pg = PHYS_TO_VM_PAGE(pa);
>  
> - /* MP - Acquire lock for this pmap */
> -
> - s = splvm();
> + pmap_lock(pm);
>   pted = pmap_vp_lookup(pm, va, NULL);
>   if (pted && PTED_VALID(pted)) {
>   pmap_remove_pted(pm, pted);
> @@ -535,8 +553,7 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
>  
>   error = 0;
>  out:
> - splx(s);
> - /* MP - free pmap lock */
> + pmap_unlock(pm);
>   return error;
>  }
>  
> @@ -550,6 +567,7 @@ pmap_remove(pmap_t pm, vaddr_t sva, vadd
>   struct pte_desc *pted;
>   vaddr_t va;
>  
> + pmap_lock(pm);
>   for (va = sva; va < eva; va += PAGE_SIZE) {
>   pted = pmap_vp_lookup(pm, va, NULL);
>  
> @@ -564,6 +582,7 @@ pmap_remove

Re: arm64 MP

2018-02-19 Thread Mark Kettenis
> Date: Mon, 19 Feb 2018 13:49:48 +0100 (CET)
> From: Mark Kettenis 
> 
> The diff below attempts to make the arm64 pmap "mpsafe" and enables MP
> support.  This diff survived a full build on my Firefly-RK3399 board.
> It also seems to work on the Overdrive 1000.  It might even work on
> the Raspberry Pi 3.  I'd appreciate it if people could play with this
> on the Raspberry Pi and other arm64 hardware they have.
> 
> I tried to follow a strategy for making the pmap "mpsafe" that more
> closely resembles what we did for our other architectures, although I
> looked at Dale Rahn's diff as well.  It seems I fixed at least some
> bugs in the process.  But I'm sure there are bugs remaining.  I may
> even have introduced new ones.  So do expect this to crash and burn,
> or at least lock up.
> 
> I'm also interested in people testing the normal GENERIC kernel with
> this diff, especially building ports.  There are some changes in there
> that could affect non-MP as well.

Here is a slightly better diff that removes a potential lock ordering
problem, removes a redundant splvm and moves some code around to avoid
a level of indentation.  Gaining confidence, so I'm looking for ok's
now.

Index: arch/arm64/arm64/cpu.c
===
RCS file: /cvs/src/sys/arch/arm64/arm64/cpu.c,v
retrieving revision 1.13
diff -u -p -r1.13 cpu.c
--- arch/arm64/arm64/cpu.c  31 Jan 2018 10:52:12 -  1.13
+++ arch/arm64/arm64/cpu.c  19 Feb 2018 12:28:05 -
@@ -454,13 +454,8 @@ cpu_start_secondary(struct cpu_info *ci)
 
spllower(IPL_NONE);
 
-#ifdef notyet
SCHED_LOCK(s);
cpu_switchto(NULL, sched_chooseproc());
-#else
-   for (;;)
-   __asm volatile("wfe");
-#endif
 }
 
 void
Index: arch/arm64/arm64/pmap.c
===
RCS file: /cvs/src/sys/arch/arm64/arm64/pmap.c,v
retrieving revision 1.47
diff -u -p -r1.47 pmap.c
--- arch/arm64/arm64/pmap.c 31 Jan 2018 23:23:16 -  1.47
+++ arch/arm64/arm64/pmap.c 19 Feb 2018 12:28:05 -
@@ -165,6 +165,19 @@ struct mem_region *pmap_allocated = &pma
 int pmap_cnt_avail, pmap_cnt_allocated;
 uint64_t pmap_avail_kvo;
 
+static inline void
+pmap_lock(struct pmap *pmap)
+{
+   if (pmap != pmap_kernel())
+   mtx_enter(&pmap->pm_mtx);
+}
+
+static inline void
+pmap_unlock(struct pmap *pmap)
+{
+   if (pmap != pmap_kernel())
+   mtx_leave(&pmap->pm_mtx);
+}
 
 /* virtual to physical helpers */
 static inline int
@@ -362,6 +375,7 @@ pmap_vp_page_alloc(struct pool *pp, int 
void *v;
 
kd.kd_waitok = ISSET(flags, PR_WAITOK);
+   kd.kd_trylock = ISSET(flags, PR_NOWAIT);
kd.kd_slowdown = slowdown;
 
KERNEL_LOCK();
@@ -439,14 +453,20 @@ pmap_enter_pv(struct pte_desc *pted, str
if (__predict_false(!pmap_initialized))
return;
 
+   mtx_enter(&pg->mdpage.pv_mtx);
LIST_INSERT_HEAD(&(pg->mdpage.pv_list), pted, pted_pv_list);
pted->pted_va |= PTED_VA_MANAGED_M;
+   mtx_leave(&pg->mdpage.pv_mtx);
 }
 
 void
 pmap_remove_pv(struct pte_desc *pted)
 {
+   struct vm_page *pg = PHYS_TO_VM_PAGE(pted->pted_pte & PTE_RPGN);
+
+   mtx_enter(&pg->mdpage.pv_mtx);
LIST_REMOVE(pted, pted_pv_list);
+   mtx_leave(&pg->mdpage.pv_mtx);
 }
 
 int
@@ -454,7 +474,7 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
 {
struct pte_desc *pted;
struct vm_page *pg;
-   int s, error;
+   int error;
int cache = PMAP_CACHE_WB;
int need_sync = 0;
 
@@ -464,9 +484,7 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
cache = PMAP_CACHE_DEV;
pg = PHYS_TO_VM_PAGE(pa);
 
-   /* MP - Acquire lock for this pmap */
-
-   s = splvm();
+   pmap_lock(pm);
pted = pmap_vp_lookup(pm, va, NULL);
if (pted && PTED_VALID(pted)) {
pmap_remove_pted(pm, pted);
@@ -535,8 +553,7 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
 
error = 0;
 out:
-   splx(s);
-   /* MP - free pmap lock */
+   pmap_unlock(pm);
return error;
 }
 
@@ -550,6 +567,7 @@ pmap_remove(pmap_t pm, vaddr_t sva, vadd
struct pte_desc *pted;
vaddr_t va;
 
+   pmap_lock(pm);
for (va = sva; va < eva; va += PAGE_SIZE) {
pted = pmap_vp_lookup(pm, va, NULL);
 
@@ -564,6 +582,7 @@ pmap_remove(pmap_t pm, vaddr_t sva, vadd
if (PTED_VALID(pted))
pmap_remove_pted(pm, pted);
}
+   pmap_unlock(pm);
 }
 
 /*
@@ -590,11 +609,12 @@ pmap_remove_pted(pmap_t pm, struct pte_d
pted->pted_va &= ~PTED_VA_EXEC_M;
}
 
-   pted->pted_pte = 0;
-
if (PTED_MANAGED(pted))
pmap_remove_pv(pted);
 
+   pted->pted_pte = 0;
+   pted->pted_va = 0;
+
if (pm != pmap_kernel())
pool_put(&pmap_pted_pool, pted);
splx(s);
@@ -615,10 +63

ifconfig: add -rdomain option

2018-02-19 Thread Ayaka Koshibe
Hi,

This diff would allow saying 'ifconfig foo -rdomain' instead of 'ifconfig foo 
rdomain 0'.

OK?

Thanks,
Ayaka


Index: sbin/ifconfig/ifconfig.8
===
RCS file: /cvs/src/sbin/ifconfig/ifconfig.8,v
retrieving revision 1.292
diff -u -p -u -p -r1.292 ifconfig.8
--- sbin/ifconfig/ifconfig.816 Jan 2018 10:33:55 -  1.292
+++ sbin/ifconfig/ifconfig.820 Feb 2018 00:02:47 -
@@ -439,6 +439,9 @@ domains.
 If the specified rdomain does not yet exist it will be created, including
 a routing table with the same id.
 By default all interfaces belong to routing domain 0.
+.It Cm -rdomain
+If previously specified, remove the interface from the routing domain.
+The interface is returned to routing domain 0.
 .It Cm rtlabel Ar route-label
 (inet)
 Attach
Index: sbin/ifconfig/ifconfig.c
===
RCS file: /cvs/src/sbin/ifconfig/ifconfig.c,v
retrieving revision 1.353
diff -u -p -u -p -r1.353 ifconfig.c
--- sbin/ifconfig/ifconfig.c16 Jan 2018 10:33:55 -  1.353
+++ sbin/ifconfig/ifconfig.c20 Feb 2018 00:02:48 -
@@ -220,6 +220,7 @@ voidunsetvlandev(const char *, int);
 void   mpe_status(void);
 void   mpw_status(void);
 void   setrdomain(const char *, int);
+void   unsetrdomain(const char *, int);
 intprefix(void *val, int);
 void   getifgroups(void);
 void   setifgroup(const char *, int);
@@ -397,6 +398,7 @@ const structcmd {
{ "rtlabel",NEXTARG,0,  setifrtlabel },
{ "-rtlabel",   -1, 0,  setifrtlabel },
{ "rdomain",NEXTARG,0,  setrdomain },
+   { "-rdomain",   0,  0,  unsetrdomain },
{ "staticarp",  IFF_STATICARP,  0,  setifflags },
{ "-staticarp", -IFF_STATICARP, 0,  setifflags },
{ "mpls",   IFXF_MPLS,  0,  setifxflags },
@@ -5578,6 +5580,15 @@ setrdomain(const char *id, int param)
strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
ifr.ifr_rdomainid = rdomainid;
if (ioctl(s, SIOCSIFRDOMAIN, (caddr_t)&ifr) < 0)
+   warn("SIOCSIFRDOMAIN");
+}
+
+void
+unsetrdomain(const char *ignored, int alsoignored)
+{
+   strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
+   ifr.ifr_rdomainid = 0;
+   if (ioctl(s, SIOCSIFRDOMAIN, (caddr_t)&ifr) < 0)
warn("SIOCSIFRDOMAIN");
 }
 #endif



Re: update to xf86-video-ati 7.10.0

2018-02-19 Thread Matthieu Herrb
On Sat, Feb 17, 2018 at 07:55:08PM +1100, Jonathan Gray wrote:
> update from xf86-video-ati 7.7.1 to 7.10.0
> 
> 7.8.0 https://lists.x.org/archives/xorg-announce/2016-November/002738.html
> 7.9.0 https://lists.x.org/archives/xorg-announce/2017-March/002789.html
> 7.10.0
> https://lists.x.org/archives/xorg-announce/2017-September/002806.html

Hi,

this one seems to work ok on my various radeon cards:

macppc/r100:
radeondrm0 at pci2 dev 2 function 0 "ATI Radeon VE" rev 0x00

amd64/r600:

radeondrm0 at pci1 dev 5 function 0 "ATI Radeon HD 3300" rev 0x00

radeondrm0 at pci2 dev 0 function 0 "ATI Radeon HD 3650" rev 0x00

radeondrm0 at pci2 dev 0 function 0 "ATI Radeon Mobility HD 5430" rev 0x00

Thanks,
-- 
Matthieu Herrb



Re: sysctl(8): add -F option to print temps in freedom units

2018-02-19 Thread Mark Kettenis
> Date: Mon, 19 Feb 2018 11:11:19 -0600
> From: joshua stein 

You're joking right?

> Index: sbin/sysctl/sysctl.8
> ===
> RCS file: /cvs/src/sbin/sysctl/sysctl.8,v
> retrieving revision 1.214
> diff -u -p -u -p -r1.214 sysctl.8
> --- sbin/sysctl/sysctl.8  16 Feb 2018 07:27:07 -  1.214
> +++ sbin/sysctl/sysctl.8  19 Feb 2018 17:10:19 -
> @@ -68,6 +68,8 @@ flag; for the table values, the name of 
>  List all the currently available string or integer values.
>  This is the default, if no parameters are given to
>  .Nm .
> +.It Fl F
> +Print temperature fields in Fahrenheit rather than Celsius.
>  .It Fl n
>  Suppress printing of the field name, only output the field value.
>  Useful for setting shell variables.
> Index: sbin/sysctl/sysctl.c
> ===
> RCS file: /cvs/src/sbin/sysctl/sysctl.c,v
> retrieving revision 1.230
> diff -u -p -u -p -r1.230 sysctl.c
> --- sbin/sysctl/sysctl.c  16 Feb 2018 07:27:07 -  1.230
> +++ sbin/sysctl/sysctl.c  19 Feb 2018 17:10:19 -
> @@ -158,7 +158,7 @@ struct list secondlevel[] = {
>   { 0, 0 },   /* CTL_VFS */
>  };
>  
> -int  Aflag, aflag, nflag, qflag;
> +int  Aflag, aflag, Fflag, nflag, qflag;
>  
>  /*
>   * Variables requiring special processing.
> @@ -221,7 +221,7 @@ main(int argc, char *argv[])
>  {
>   int ch, lvl1;
>  
> - while ((ch = getopt(argc, argv, "Aanqw")) != -1) {
> + while ((ch = getopt(argc, argv, "AaFnqw")) != -1) {
>   switch (ch) {
>  
>   case 'A':
> @@ -232,6 +232,10 @@ main(int argc, char *argv[])
>   aflag = 1;
>   break;
>  
> + case 'F':
> + Fflag = 1;
> + break;
> +
>   case 'n':
>   nflag = 1;
>   break;
> @@ -2561,8 +2565,12 @@ print_sensor(struct sensor *s)
>   else {
>   switch (s->type) {
>   case SENSOR_TEMP:
> - printf("%.2f degC",
> - (s->value - 27315) / 100.0);
> + if (Fflag)
> + printf("%.2f degF",
> + (s->value * 1.8 - 45967) / 100.0);
> + else
> + printf("%.2f degC",
> + (s->value - 27315) / 100.0);
>   break;
>   case SENSOR_FANRPM:
>   printf("%lld RPM", s->value);
> 
> 



sysctl(8): add -F option to print temps in freedom units

2018-02-19 Thread joshua stein
Index: sbin/sysctl/sysctl.8
===
RCS file: /cvs/src/sbin/sysctl/sysctl.8,v
retrieving revision 1.214
diff -u -p -u -p -r1.214 sysctl.8
--- sbin/sysctl/sysctl.816 Feb 2018 07:27:07 -  1.214
+++ sbin/sysctl/sysctl.819 Feb 2018 17:10:19 -
@@ -68,6 +68,8 @@ flag; for the table values, the name of 
 List all the currently available string or integer values.
 This is the default, if no parameters are given to
 .Nm .
+.It Fl F
+Print temperature fields in Fahrenheit rather than Celsius.
 .It Fl n
 Suppress printing of the field name, only output the field value.
 Useful for setting shell variables.
Index: sbin/sysctl/sysctl.c
===
RCS file: /cvs/src/sbin/sysctl/sysctl.c,v
retrieving revision 1.230
diff -u -p -u -p -r1.230 sysctl.c
--- sbin/sysctl/sysctl.c16 Feb 2018 07:27:07 -  1.230
+++ sbin/sysctl/sysctl.c19 Feb 2018 17:10:19 -
@@ -158,7 +158,7 @@ struct list secondlevel[] = {
{ 0, 0 },   /* CTL_VFS */
 };
 
-intAflag, aflag, nflag, qflag;
+intAflag, aflag, Fflag, nflag, qflag;
 
 /*
  * Variables requiring special processing.
@@ -221,7 +221,7 @@ main(int argc, char *argv[])
 {
int ch, lvl1;
 
-   while ((ch = getopt(argc, argv, "Aanqw")) != -1) {
+   while ((ch = getopt(argc, argv, "AaFnqw")) != -1) {
switch (ch) {
 
case 'A':
@@ -232,6 +232,10 @@ main(int argc, char *argv[])
aflag = 1;
break;
 
+   case 'F':
+   Fflag = 1;
+   break;
+
case 'n':
nflag = 1;
break;
@@ -2561,8 +2565,12 @@ print_sensor(struct sensor *s)
else {
switch (s->type) {
case SENSOR_TEMP:
-   printf("%.2f degC",
-   (s->value - 27315) / 100.0);
+   if (Fflag)
+   printf("%.2f degF",
+   (s->value * 1.8 - 45967) / 100.0);
+   else
+   printf("%.2f degC",
+   (s->value - 27315) / 100.0);
break;
case SENSOR_FANRPM:
printf("%lld RPM", s->value);



Re: Mark setrtable(2) as NOLOCK

2018-02-19 Thread Mark Kettenis
> Date: Mon, 19 Feb 2018 16:22:30 +0100
> From: Martin Pieuchot 
> 
> Now that suser() is no longer messing with a per-process field, we
> can directly turn setrtable(2) as NOLOCK.
> 
> Apart from sanity checks this syscall writes an int-sized per-process
> field.  Is a memory barrier enough?
> 
> ok?

I'm wondering if we need a memory barrier here at all.  What race are
you trying to prevent here?

> Index: kern/syscalls.master
> ===
> RCS file: /cvs/src/sys/kern/syscalls.master,v
> retrieving revision 1.180
> diff -u -p -r1.180 syscalls.master
> --- kern/syscalls.master  12 Dec 2017 01:12:34 -  1.180
> +++ kern/syscalls.master  19 Feb 2018 15:05:34 -
> @@ -532,7 +532,7 @@
>  307  OBSOL   statfs53
>  308  OBSOL   fstatfs53
>  309  OBSOL   fhstatfs53
> -310  STD { int sys_setrtable(int rtableid); }
> +310  STD NOLOCK  { int sys_setrtable(int rtableid); }
>  311  STD NOLOCK  { int sys_getrtable(void); }
>  312  OBSOL   t32_getdirentries
>  313  STD { int sys_faccessat(int fd, const char *path, \
> Index: kern/uipc_syscalls.c
> ===
> RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v
> retrieving revision 1.165
> diff -u -p -r1.165 uipc_syscalls.c
> --- kern/uipc_syscalls.c  19 Feb 2018 08:59:52 -  1.165
> +++ kern/uipc_syscalls.c  19 Feb 2018 15:19:35 -
> @@ -1189,6 +1190,9 @@ sys_setrtable(struct proc *p, void *v, r
>   return (EINVAL);
>  
>   p->p_p->ps_rtableid = (u_int)rtableid;
> + /* Force visibility */
> + membar_producer();
> +
>   return (0);
>  }
>  
> 
> 



Mark setrtable(2) as NOLOCK

2018-02-19 Thread Martin Pieuchot
Now that suser() is no longer messing with a per-process field, we
can directly turn setrtable(2) as NOLOCK.

Apart from sanity checks this syscall writes an int-sized per-process
field.  Is a memory barrier enough?

ok?

Index: kern/syscalls.master
===
RCS file: /cvs/src/sys/kern/syscalls.master,v
retrieving revision 1.180
diff -u -p -r1.180 syscalls.master
--- kern/syscalls.master12 Dec 2017 01:12:34 -  1.180
+++ kern/syscalls.master19 Feb 2018 15:05:34 -
@@ -532,7 +532,7 @@
 307OBSOL   statfs53
 308OBSOL   fstatfs53
 309OBSOL   fhstatfs53
-310STD { int sys_setrtable(int rtableid); }
+310STD NOLOCK  { int sys_setrtable(int rtableid); }
 311STD NOLOCK  { int sys_getrtable(void); }
 312OBSOL   t32_getdirentries
 313STD { int sys_faccessat(int fd, const char *path, \
Index: kern/uipc_syscalls.c
===
RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v
retrieving revision 1.165
diff -u -p -r1.165 uipc_syscalls.c
--- kern/uipc_syscalls.c19 Feb 2018 08:59:52 -  1.165
+++ kern/uipc_syscalls.c19 Feb 2018 15:19:35 -
@@ -1189,6 +1190,9 @@ sys_setrtable(struct proc *p, void *v, r
return (EINVAL);
 
p->p_p->ps_rtableid = (u_int)rtableid;
+   /* Force visibility */
+   membar_producer();
+
return (0);
 }
 



enternewpgrp() & enterthispgrp()

2018-02-19 Thread Martin Pieuchot
Diff below shuffles the code used by sys_setsid() and sys_setpgid()
into two functions instead of one.

It is extracted from guenther@'s proctree diff and I'd like to get
it in to reduce the locking diff.

ok?

Index: kern/kern_proc.c
===
RCS file: /cvs/src/sys/kern/kern_proc.c,v
retrieving revision 1.80
diff -u -p -r1.80 kern_proc.c
--- kern/kern_proc.c10 Feb 2018 10:32:51 -  1.80
+++ kern/kern_proc.c19 Feb 2018 14:46:06 -
@@ -73,6 +73,9 @@ struct pool ucred_pool;
 struct pool pgrp_pool;
 struct pool session_pool;
 
+void   pgdelete(struct pgrp *);
+void   fixjobc(struct process *, struct pgrp *, int);
+
 static void orphanpg(struct pgrp *);
 #ifdef DEBUG
 void pgrpdump(void);
@@ -222,67 +225,54 @@ zombiefind(pid_t pid)
 }
 
 /*
- * Move p to a new or existing process group (and session)
- * Caller provides a pre-allocated pgrp and session that should
- * be freed if they are not used.
- * XXX need proctree lock
+ * Move process to a new process group.  If a session is provided
+ * then it's a new session to contain this process group; otherwise
+ * the process is staying within its existing session.
  */
-int
-enterpgrp(struct process *pr, pid_t pgid, struct pgrp *newpgrp,
-struct session *newsess)
+void
+enternewpgrp(struct process *pr, struct pgrp *pgrp, struct session *newsess)
 {
-   struct pgrp *pgrp = pgfind(pgid);
-
 #ifdef DIAGNOSTIC
-   if (pgrp != NULL && newsess)/* firewalls */
-   panic("enterpgrp: setsid into non-empty pgrp");
if (SESS_LEADER(pr))
-   panic("enterpgrp: session leader attempted setpgrp");
+   panic("%s: session leader attempted setpgrp", __func__);
 #endif
-   if (pgrp == NULL) {
+
+   if (newsess != NULL) {
/*
-* new process group
+* New session.  Initialize it completely
 */
+   timeout_set(&newsess->s_verauthto, zapverauth, newsess);
+   newsess->s_leader = pr;
+   newsess->s_count = 1;
+   newsess->s_ttyvp = NULL;
+   newsess->s_ttyp = NULL;
+   memcpy(newsess->s_login, pr->ps_session->s_login,
+   sizeof(newsess->s_login));
+   atomic_clearbits_int(&pr->ps_flags, PS_CONTROLT);
+   pgrp->pg_session = newsess;
 #ifdef DIAGNOSTIC
-   if (pr->ps_pid != pgid)
-   panic("enterpgrp: new pgrp and pid != pgid");
-#endif
-
-   pgrp = newpgrp;
-   if (newsess) {
-   /*
-* new session
-*/
-   newsess->s_leader = pr;
-   newsess->s_count = 1;
-   newsess->s_ttyvp = NULL;
-   newsess->s_ttyp = NULL;
-   memcpy(newsess->s_login, pr->ps_session->s_login,
-   sizeof(newsess->s_login));
-   atomic_clearbits_int(&pr->ps_flags, PS_CONTROLT);
-   pgrp->pg_session = newsess;
-#ifdef DIAGNOSTIC
-   if (pr != curproc->p_p)
-   panic("enterpgrp: mksession but not curproc");
+   if (pr != curproc->p_p)
+   panic("%s: mksession but not curproc", __func__);
 #endif
-   } else {
-   pgrp->pg_session = pr->ps_session;
-   pgrp->pg_session->s_count++;
-   }
-   pgrp->pg_id = pgid;
-   LIST_INIT(&pgrp->pg_members);
-   LIST_INSERT_HEAD(PGRPHASH(pgid), pgrp, pg_hash);
-   pgrp->pg_jobc = 0;
-   } else if (pgrp == pr->ps_pgrp) {
-   if (newsess)
-   pool_put(&session_pool, newsess);
-   pool_put(&pgrp_pool, newpgrp);
-   return (0);
} else {
-   if (newsess)
-   pool_put(&session_pool, newsess);
-   pool_put(&pgrp_pool, newpgrp);
+   pgrp->pg_session = pr->ps_session;
+   pgrp->pg_session->s_count++;
}
+   pgrp->pg_id = pr->ps_pid;
+   LIST_INIT(&pgrp->pg_members);
+   LIST_INSERT_HEAD(PGRPHASH(pr->ps_pid), pgrp, pg_hash);
+   pgrp->pg_jobc = 0;
+
+   enterthispgrp(pr, pgrp);
+}
+
+/*
+ * move process to an existing process group
+ */
+void
+enterthispgrp(struct process *pr, struct pgrp *pgrp)
+{
+   struct pgrp *savepgrp = pr->ps_pgrp;
 
/*
 * Adjust eligibility of affected pgrps to participate in job control.
@@ -290,14 +280,13 @@ enterpgrp(struct process *pr, pid_t pgid
 * could reach 0 spuriously during the first call.
 */
fixjobc(pr, pgrp, 1);
-   fixjobc(pr, pr->ps_pgrp, 0);
+   fixjobc(pr, savepgrp, 0);
 
LIST_REMOVE(pr, ps_pglist);
-   if (LIST_EMPTY(&pr->ps_pgrp->pg_members))
-  

Re: arm64 MP

2018-02-19 Thread Jonathan Gray
On Mon, Feb 19, 2018 at 01:49:48PM +0100, Mark Kettenis wrote:
> The diff below attempts to make the arm64 pmap "mpsafe" and enables MP
> support.  This diff survived a full build on my Firefly-RK3399 board.
> It also seems to work on the Overdrive 1000.  It might even work on
> the Raspberry Pi 3.  I'd appreciate it if people could play with this
> on the Raspberry Pi and other arm64 hardware they have.

On rpi3 with the dtb from raspberrypi-firmware-1.20171029

cpu0 at mainbus0 mpidr 0: ARM Cortex-A53 r0p4
...
cpu1 at mainbus0 mpidr 1: failed to spin up
cpu2 at mainbus0 mpidr 2: failed to spin up
cpu3 at mainbus0 mpidr 3: failed to spin up

hw.ncpu=1
hw.ncpufound=4

As there is no 'enable-method' or 'cpu-release-addr' on cpu nodes.

There is no newer release but with the repository as of
445b6a86faec72eaddb8c4386b87e385e1c338ac 'kernel: Bump to 4.14.18'
these are now present.

Node 0x3e60
name: 'cpus'
#address-cells: 0001
#size-cells: 
enable-method: 'brcm,bcm2836-smp'
phandle: 0073

Node 0x3ebc
name: 'cpu'
clock-frequency: 47868c00
device_type: 'cpu'
compatible: 'arm,cortex-a53'
reg: 
enable-method: 'spin-table'
cpu-release-addr: .00d8
phandle: 002a

cpu0 at mainbus0 mpidr 0: ARM Cortex-A53 r0p4
...
cpu1 at mainbus0 mpidr 1: ARM Cortex-A53 r0p4
cpu2 at mainbus0 mpidr 2: ARM Cortex-A53 r0p4
cpu3 at mainbus0 mpidr 3: ARM Cortex-A53 r0p4

hw.ncpu=4
hw.ncpufound=4

Index: Makefile
===
RCS file: /cvs/ports/sysutils/raspberrypi-firmware/Makefile,v
retrieving revision 1.6
diff -u -p -r1.6 Makefile
--- Makefile18 Nov 2017 03:54:11 -  1.6
+++ Makefile19 Feb 2018 13:37:21 -
@@ -3,8 +3,10 @@
 COMMENT=   Raspberry Pi firmware
 GH_ACCOUNT=raspberrypi
 GH_PROJECT=firmware
-GH_TAGNAME=1.20171029
-DISTNAME=  ${GH_ACCOUNT}-${GH_PROJECT}-${GH_TAGNAME}
+V= 1.20180210
+GH_COMMIT= 445b6a86faec72eaddb8c4386b87e385e1c338ac
+#DISTNAME= ${GH_ACCOUNT}-${GH_PROJECT}-${GH_TAGNAME}
+DISTNAME=  ${GH_ACCOUNT}-${GH_PROJECT}-${V}
 
 CATEGORIES=sysutils
 
Index: distinfo
===
RCS file: /cvs/ports/sysutils/raspberrypi-firmware/distinfo,v
retrieving revision 1.6
diff -u -p -r1.6 distinfo
--- distinfo18 Nov 2017 03:54:11 -  1.6
+++ distinfo19 Feb 2018 13:40:17 -
@@ -1,2 +1,2 @@
-SHA256 (raspberrypi-firmware-1.20171029.tar.gz) = 
Rs4oyNh+8ivcxXrBg2yj8E0exvRlgP9aML92s8CCIRc=
-SIZE (raspberrypi-firmware-1.20171029.tar.gz) = 120793811
+SHA256 (raspberrypi-firmware-1.20180210-445b6a86.tar.gz) = 
h5uHJz5R+O57YTPZ367uFXds9FH+S1L41gknjK/MnFY=
+SIZE (raspberrypi-firmware-1.20180210-445b6a86.tar.gz) = 121878108
Index: pkg/PLIST
===
RCS file: /cvs/ports/sysutils/raspberrypi-firmware/pkg/PLIST,v
retrieving revision 1.5
diff -u -p -r1.5 PLIST
--- pkg/PLIST   18 Aug 2017 02:58:53 -  1.5
+++ pkg/PLIST   19 Feb 2018 13:40:36 -
@@ -29,6 +29,7 @@ share/raspberrypi-firmware/boot/overlays
 share/raspberrypi-firmware/boot/overlays/allo-digione.dtbo
 share/raspberrypi-firmware/boot/overlays/allo-piano-dac-pcm512x-audio.dtbo
 share/raspberrypi-firmware/boot/overlays/allo-piano-dac-plus-pcm512x-audio.dtbo
+share/raspberrypi-firmware/boot/overlays/applepi-dac.dtbo
 share/raspberrypi-firmware/boot/overlays/at86rf233.dtbo
 share/raspberrypi-firmware/boot/overlays/audioinjector-addons.dtbo
 share/raspberrypi-firmware/boot/overlays/audioinjector-wm8731-audio.dtbo
@@ -43,10 +44,13 @@ share/raspberrypi-firmware/boot/overlays
 share/raspberrypi-firmware/boot/overlays/dwc2.dtbo
 share/raspberrypi-firmware/boot/overlays/enc28j60-spi2.dtbo
 share/raspberrypi-firmware/boot/overlays/enc28j60.dtbo
+share/raspberrypi-firmware/boot/overlays/exc3000.dtbo
 share/raspberrypi-firmware/boot/overlays/fe-pi-audio.dtbo
 share/raspberrypi-firmware/boot/overlays/goodix.dtbo
 share/raspberrypi-firmware/boot/overlays/googlevoicehat-soundcard.dtbo
+share/raspberrypi-firmware/boot/overlays/gpio-ir-tx.dtbo
 share/raspberrypi-firmware/boot/overlays/gpio-ir.dtbo
+share/raspberrypi-firmware/boot/overlays/gpio-key.dtbo
 share/raspberrypi-firmware/boot/overlays/gpio-poweroff.dtbo
 share/raspberrypi-firmware/boot/overlays/gpio-shutdown.dtbo
 share/raspberrypi-firmware/boot/overlays/hifiberry-amp.dtbo
@@ -72,11 +76,14 @@ share/raspberrypi-firmware/boot/overlays
 share/raspberrypi-firmware/boot/overlays/justboom-dac.dtbo
 share/raspberrypi-firmware/boot/overlays/justboom-digi.dtbo
 share/raspberrypi-firmware/boot/overlays/lirc-rpi.dtbo
+share/raspberrypi-firmware/boot/overlays/mbed-dac.dtbo
 share/raspberrypi-firmware/boot/overlays/mcp23017.dtbo
 share/raspberrypi-firmware/boot/overlays/mcp23s17.dtbo
 share/raspberrypi-firmware/boo

Re: default route and route socket filter on priority

2018-02-19 Thread Sebastian Benoit
Martin Pieuchot(m...@openbsd.org) on 2018.02.12 15:23:45 +0100:
> On 12/02/18(Mon) 12:02, Sebastian Benoit wrote:
> > routefilter currently filters the default route,
> > if it's priority is higher than the filter prio.
> 
> Then why not change the priority of the default route?
> 
> > This might not be a good idea - for example you might want to
> > redistribute a default route into ospf, for that you need it in the
> > routing table but it is configured with a low priority (high value) as
> > a route of last resort or maybe even as a reject/blackhole.
> 
> Are you using multiple default routes?  I don't understand why priority
> matter, a default route will always be the last resort.

Because we cannot guarantee that the default route has prio 8. For example
bgpd might inject the default route with prio 48, and you want to
redistribute it into ospf. Thats just how the interaction between our
routing daemons works.

I admit that this is a bit of a hack, because we kind of also want filters
for any prefix (because you could have "redistribute 8.8.8.0/24" in your
config) and route labels later on. I havent tackled that yet, so we only
filter on prio and the default route special case and turn of the prio
filter when we find a configuration that needs to see all routes.

We are trying to optimize the ammount of data going over the route socket
here. Shoveling information through it thats not needed is just useless
work for the cpus.
 
> > Treat the default route as a special case and always pass it.
> 
> I'm not fan of adding more complexity to route_input().

I understand that, but where else on the kernel side would you put filters?
If i continue with this, i'm happy to make the code cleaner.

> This function
> does not deal with address nor mask, it is just delivering already built
> messages.  Now if this is unavoidable it would be nice if isdefault()
> could be reused in if_group_routechange(), rt_ifa_addlocal() and
> rt_ifa_dellocal().

I saw that, but did not want to mix it into this diff. Will send one when
this gets in.

so ok?

> 
> > diff --git sys/net/rtsock.c sys/net/rtsock.c
> > index 70497d29a93..2e94fdbc0b1 100644
> > --- sys/net/rtsock.c
> > +++ sys/net/rtsock.c
> > @@ -110,7 +110,9 @@ int route_output(struct mbuf *, struct socket *, 
> > struct sockaddr *,
> >  introute_ctloutput(int, struct socket *, int, int, struct mbuf *);
> >  introute_usrreq(struct socket *, int, struct mbuf *, struct mbuf *,
> > struct mbuf *, struct proc *);
> > -void   route_input(struct mbuf *m0, struct socket *, sa_family_t);
> > +introute_is_default(struct sockaddr *, struct sockaddr *);
> > +void   route_input(struct mbuf *m0, struct socket *, struct 
> > rt_addrinfo *,
> > +   sa_family_t);
> >  introute_arp_conflict(struct rtentry *, struct rt_addrinfo *);
> >  introute_cleargateway(struct rtentry *, void *, unsigned int);
> >  void   route_senddesync(void *);
> > @@ -403,8 +405,33 @@ route_senddesync(void *data)
> > timeout_add(&rop->timeout, ROUTE_DESYNC_RESEND_TIMEOUT);
> >  }
> >  
> > +int
> > +route_is_default(struct sockaddr *dst, struct sockaddr *mask)
> > +{
> > +   /* see if.c if_group_routechange() */
> > +   switch (dst->sa_family) {
> > +   case AF_INET:
> > +   if (satosin(dst)->sin_addr.s_addr == INADDR_ANY &&
> > +   mask && (mask->sa_len == 0 ||
> > +   satosin(mask)->sin_addr.s_addr == INADDR_ANY))
> > +   return (1);
> > +   break;
> > +#ifdef INET6
> > +   case AF_INET6:
> > +   if (IN6_ARE_ADDR_EQUAL(&(satosin6(dst))->sin6_addr,
> > +   &in6addr_any) && mask && (mask->sa_len == 0 ||
> > +   IN6_ARE_ADDR_EQUAL(&(satosin6(mask))->sin6_addr,
> > +   &in6addr_any)))
> > +   return (1);
> > +   break;
> > +#endif
> > +   }
> > +   return (0);
> > +}
> > +
> >  void
> > -route_input(struct mbuf *m0, struct socket *so, sa_family_t sa_family)
> > +route_input(struct mbuf *m0, struct socket *so, struct rt_addrinfo *info,
> > +sa_family_t sa_family)
> >  {
> > struct routecb *rop;
> > struct rawcb *rp;
> > @@ -447,8 +474,16 @@ route_input(struct mbuf *m0, struct socket *so, 
> > sa_family_t sa_family)
> > if (rtm->rtm_type != RTM_DESYNC && rop->msgfilter != 0 &&
> > !(rop->msgfilter & (1 << rtm->rtm_type)))
> > continue;
> > -   if (rop->priority != 0 && rop->priority < rtm->rtm_priority)
> > -   continue;
> > +   /* priority filter */
> > +   if (rop->priority != 0)
> > +   if ((info == NULL &&
> > +   rop->priority < rtm->rtm_priority) ||
> > +   (info != NULL &&
> > +   !route_is_default(info->rti_info[RTAX_DST],
> > +   info->rti_info[RTAX_NETMASK]) &&
> > +   rop->priority < rtm->rtm_prior

arm64 MP

2018-02-19 Thread Mark Kettenis
The diff below attempts to make the arm64 pmap "mpsafe" and enables MP
support.  This diff survived a full build on my Firefly-RK3399 board.
It also seems to work on the Overdrive 1000.  It might even work on
the Raspberry Pi 3.  I'd appreciate it if people could play with this
on the Raspberry Pi and other arm64 hardware they have.

I tried to follow a strategy for making the pmap "mpsafe" that more
closely resembles what we did for our other architectures, although I
looked at Dale Rahn's diff as well.  It seems I fixed at least some
bugs in the process.  But I'm sure there are bugs remaining.  I may
even have introduced new ones.  So do expect this to crash and burn,
or at least lock up.

I'm also interested in people testing the normal GENERIC kernel with
this diff, especially building ports.  There are some changes in there
that could affect non-MP as well.


Index: arch/arm64/arm64/cpu.c
===
RCS file: /cvs/src/sys/arch/arm64/arm64/cpu.c,v
retrieving revision 1.13
diff -u -p -r1.13 cpu.c
--- arch/arm64/arm64/cpu.c  31 Jan 2018 10:52:12 -  1.13
+++ arch/arm64/arm64/cpu.c  19 Feb 2018 12:28:05 -
@@ -454,13 +454,8 @@ cpu_start_secondary(struct cpu_info *ci)
 
spllower(IPL_NONE);
 
-#ifdef notyet
SCHED_LOCK(s);
cpu_switchto(NULL, sched_chooseproc());
-#else
-   for (;;)
-   __asm volatile("wfe");
-#endif
 }
 
 void
Index: arch/arm64/arm64/pmap.c
===
RCS file: /cvs/src/sys/arch/arm64/arm64/pmap.c,v
retrieving revision 1.47
diff -u -p -r1.47 pmap.c
--- arch/arm64/arm64/pmap.c 31 Jan 2018 23:23:16 -  1.47
+++ arch/arm64/arm64/pmap.c 19 Feb 2018 12:28:05 -
@@ -165,6 +165,19 @@ struct mem_region *pmap_allocated = &pma
 int pmap_cnt_avail, pmap_cnt_allocated;
 uint64_t pmap_avail_kvo;
 
+static inline void
+pmap_lock(struct pmap *pmap)
+{
+   if (pmap != pmap_kernel())
+   mtx_enter(&pmap->pm_mtx);
+}
+
+static inline void
+pmap_unlock(struct pmap *pmap)
+{
+   if (pmap != pmap_kernel())
+   mtx_leave(&pmap->pm_mtx);
+}
 
 /* virtual to physical helpers */
 static inline int
@@ -362,6 +375,7 @@ pmap_vp_page_alloc(struct pool *pp, int 
void *v;
 
kd.kd_waitok = ISSET(flags, PR_WAITOK);
+   kd.kd_trylock = ISSET(flags, PR_NOWAIT);
kd.kd_slowdown = slowdown;
 
KERNEL_LOCK();
@@ -439,14 +453,20 @@ pmap_enter_pv(struct pte_desc *pted, str
if (__predict_false(!pmap_initialized))
return;
 
+   mtx_enter(&pg->mdpage.pv_mtx);
LIST_INSERT_HEAD(&(pg->mdpage.pv_list), pted, pted_pv_list);
pted->pted_va |= PTED_VA_MANAGED_M;
+   mtx_leave(&pg->mdpage.pv_mtx);
 }
 
 void
 pmap_remove_pv(struct pte_desc *pted)
 {
+   struct vm_page *pg = PHYS_TO_VM_PAGE(pted->pted_pte & PTE_RPGN);
+
+   mtx_enter(&pg->mdpage.pv_mtx);
LIST_REMOVE(pted, pted_pv_list);
+   mtx_leave(&pg->mdpage.pv_mtx);
 }
 
 int
@@ -454,7 +474,7 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
 {
struct pte_desc *pted;
struct vm_page *pg;
-   int s, error;
+   int error;
int cache = PMAP_CACHE_WB;
int need_sync = 0;
 
@@ -464,9 +484,7 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
cache = PMAP_CACHE_DEV;
pg = PHYS_TO_VM_PAGE(pa);
 
-   /* MP - Acquire lock for this pmap */
-
-   s = splvm();
+   pmap_lock(pm);
pted = pmap_vp_lookup(pm, va, NULL);
if (pted && PTED_VALID(pted)) {
pmap_remove_pted(pm, pted);
@@ -535,8 +553,7 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
 
error = 0;
 out:
-   splx(s);
-   /* MP - free pmap lock */
+   pmap_unlock(pm);
return error;
 }
 
@@ -550,6 +567,7 @@ pmap_remove(pmap_t pm, vaddr_t sva, vadd
struct pte_desc *pted;
vaddr_t va;
 
+   pmap_lock(pm);
for (va = sva; va < eva; va += PAGE_SIZE) {
pted = pmap_vp_lookup(pm, va, NULL);
 
@@ -564,6 +582,7 @@ pmap_remove(pmap_t pm, vaddr_t sva, vadd
if (PTED_VALID(pted))
pmap_remove_pted(pm, pted);
}
+   pmap_unlock(pm);
 }
 
 /*
@@ -590,11 +609,12 @@ pmap_remove_pted(pmap_t pm, struct pte_d
pted->pted_va &= ~PTED_VA_EXEC_M;
}
 
-   pted->pted_pte = 0;
-
if (PTED_MANAGED(pted))
pmap_remove_pv(pted);
 
+   pted->pted_pte = 0;
+   pted->pted_va = 0;
+
if (pm != pmap_kernel())
pool_put(&pmap_pted_pool, pted);
splx(s);
@@ -615,10 +635,6 @@ _pmap_kenter_pa(vaddr_t va, paddr_t pa, 
 {
pmap_t pm = pmap_kernel();
struct pte_desc *pted;
-   int s;
-
-   /* MP - lock pmap. */
-   s = splvm();
 
pted = pmap_vp_lookup(pm, va, NULL);
 
@@ -645,8 +661,6 @@ _pmap_kenter_pa(vaddr_t va, paddr_t pa, 
pmap_pte_insert(

Re: Fixed sys_socket() plumbing

2018-02-19 Thread Martin Pieuchot
On 12/02/18(Mon) 09:22, Martin Pieuchot wrote:
> I found my mistake in the previous diff.  `SS_NBIO' was never set on
> non blocking sockets.  Diff below fixes that by checking `nonblock'
> like it is done in sys_socketpair().
> 
> Tests & oks welcome :)

Anyone?

> Index: kern/uipc_syscalls.c
> ===
> RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v
> retrieving revision 1.164
> diff -u -p -r1.164 uipc_syscalls.c
> --- kern/uipc_syscalls.c  11 Feb 2018 21:53:57 -  1.164
> +++ kern/uipc_syscalls.c  12 Feb 2018 08:19:30 -
> @@ -83,7 +83,7 @@ sys_socket(struct proc *p, void *v, regi
>   struct file *fp;
>   int type = SCARG(uap, type);
>   int domain = SCARG(uap, domain);
> - int fd, error;
> + int fd, cloexec, nonblock, fflag, error;
>   unsigned int ss = 0;
>  
>   if ((type & SOCK_DNS) && !(domain == AF_INET || domain == AF_INET6))
> @@ -95,24 +95,25 @@ sys_socket(struct proc *p, void *v, regi
>   if (error)
>   return (error);
>  
> - fdplock(fdp);
> - error = falloc(p, (type & SOCK_CLOEXEC) ? UF_EXCLOSE : 0, &fp, &fd);
> - fdpunlock(fdp);
> + type &= ~(SOCK_CLOEXEC | SOCK_NONBLOCK | SOCK_DNS);
> + cloexec = (SCARG(uap, type) & SOCK_CLOEXEC) ? UF_EXCLOSE : 0;
> + nonblock = SCARG(uap, type) & SOCK_NONBLOCK;
> + fflag = FREAD | FWRITE | (nonblock ? FNONBLOCK : 0);
> +
> + error = socreate(SCARG(uap, domain), &so, type, SCARG(uap, protocol));
>   if (error != 0)
>   goto out;
>  
> - fp->f_flag = FREAD | FWRITE | (type & SOCK_NONBLOCK ? FNONBLOCK : 0);
> - fp->f_type = DTYPE_SOCKET;
> - fp->f_ops = &socketops;
> - error = socreate(SCARG(uap, domain), &so,
> - type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK | SOCK_DNS), SCARG(uap, 
> protocol));
> + fdplock(fdp);
> + error = falloc(p, cloexec, &fp, &fd);
> + fdpunlock(fdp);
>   if (error) {
> - fdplock(fdp);
> - fdremove(fdp, fd);
> - closef(fp, p);
> - fdpunlock(fdp);
> + soclose(so);
>   } else {
> - if (type & SOCK_NONBLOCK)
> + fp->f_flag = fflag;
> + fp->f_type = DTYPE_SOCKET;
> + fp->f_ops = &socketops;
> + if (nonblock)
>   so->so_state |= SS_NBIO;
>   so->so_state |= ss;
>   fp->f_data = so;
> @@ -451,7 +452,7 @@ sys_socketpair(struct proc *p, void *v, 
>  
>   type  = SCARG(uap, type) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK);
>   cloexec = (SCARG(uap, type) & SOCK_CLOEXEC) ? UF_EXCLOSE : 0;
> - nonblock = SCARG(uap, type) &  SOCK_NONBLOCK;
> + nonblock = SCARG(uap, type) & SOCK_NONBLOCK;
>   fflag = FREAD | FWRITE | (nonblock ? FNONBLOCK : 0);
>  
>   error = socreate(SCARG(uap, domain), &so1, type, SCARG(uap, protocol));
> 



Re: mpath cloning routes and cloned routes

2018-02-19 Thread Martin Pieuchot
On 14/02/18(Wed) 21:53, Florian Riehm wrote:
> If we delete cloning routes, we also delete their cloned routes.
> This doesn't make sense if we delete a multipath cloning route and may result 
> in
> broken gateway routes:

That's a bug!

> # netstat -rn | grep 192.168.178
> default192.168.178.1  UGS5 4939 -12 iwn0 
> 192.168.178/24 192.168.178.52 UCPn   1   51 - 8 iwn0 
> 192.168.178/24 192.168.178.53 UCPn   00 - 8 iwn0 
> 192.168.178.1  34:31:c4:24:83:d4  UHLch  1  118 - 7 iwn0 
> 192.168.178.52 a4:4e:31:38:70:7c  UHLl   0 3749 - 1 iwn0 
> 192.168.178.53 a4:4e:31:38:70:7c  UHLl   00 - 1 iwn0 
> 192.168.178.255192.168.178.52 UHPb   00 - 1 iwn0 
> 192.168.178.255192.168.178.53 UHPb   00 - 1 iwn0 
> As you can see above, iwn0 has 192.168.178.52/24 and 192.168.178.53/24 
> assigned
> and therefore we have 2 mpath cloning routes (P). Their is a cloned route to
> 192.168.178.1 with RTF_CACHED (h) to reach the default gateway.
> 
> # ifconfig iwn0 inet 192.168.178.53 delete
> # netstat -rn | grep 192.168.178
> default192.168.178.1  UGS5 4955 -12 iwn0 
> 192.168.178/24 192.168.178.52 UCn0   51 - 8 iwn0 
> 192.168.178.52 a4:4e:31:38:70:7c  UHLl   0 3754 - 1 iwn0 
> 192.168.178.255192.168.178.52 UHb00 - 1 iwn0 
> Now 192.168.178.53/24 was deleted, therefore the cloned route to the gateway
> (192.168.178.1) is also gone and the default route is 'broken':
> 
> # ping 8.8.8.8
> # dmesg | tail
> arpresolve: 192.168.178.1: route contains no arp information
> arpresolve: 192.168.178.1: route contains no arp information
> arpresolve: 192.168.178.1: route contains no arp information
> arpresolve: 192.168.178.1: route contains no arp information
> 
> I think there is no need to delete cloned routes as long as we don't delete
> the last cloning route to a network.

Not flushing any RTF_CLONED is a too big hammer and will break setups
where the cloning routes are on two different interfaces.  

A better fix is to not delete RTF_CACHED routes until their own parent
is going away.  Diff below does that and includes a regression test.
Does that work for you?  ok?

Index: sys/net/route.c
===
RCS file: /cvs/src/sys/net/route.c,v
retrieving revision 1.371
diff -u -p -r1.371 route.c
--- sys/net/route.c 10 Feb 2018 09:17:56 -  1.371
+++ sys/net/route.c 19 Feb 2018 09:55:31 -
@@ -707,26 +707,31 @@ rtequal(struct rtentry *a, struct rtentr
 int
 rtflushclone1(struct rtentry *rt, void *arg, u_int id)
 {
-   struct rtentry *parent = arg;
+   struct rtentry *cloningrt = arg;
struct ifnet *ifp;
int error;
 
-   ifp = if_get(rt->rt_ifidx);
+   if (!ISSET(rt->rt_flags, RTF_CLONED))
+   return 0;
+
+   /* Cached route must stay alive as long as their parent are alive. */
+   if (ISSET(rt->rt_flags, RTF_CACHED) && (rt->rt_parent != cloningrt))
+   return 0;
 
+   if (!rtequal(rt->rt_parent, cloningrt))
+   return 0;
/*
 * This happens when an interface with a RTF_CLONING route is
 * being detached.  In this case it's safe to bail because all
 * the routes are being purged by rt_ifa_purge().
 */
+   ifp = if_get(rt->rt_ifidx);
if (ifp == NULL)
return 0;
 
-   if (ISSET(rt->rt_flags, RTF_CLONED) && rtequal(rt->rt_parent, parent)) {
-   error = rtdeletemsg(rt, ifp, id);
-   if (error == 0)
-   error = EAGAIN;
-   } else
-   error = 0;
+   error = rtdeletemsg(rt, ifp, id);
+   if (error == 0)
+   error = EAGAIN;
 
if_put(ifp);
return error;
Index: regress/sbin/route/Makefile
===
RCS file: /cvs/src/regress/sbin/route/Makefile,v
retrieving revision 1.27
diff -u -p -r1.27 Makefile
--- regress/sbin/route/Makefile 14 Feb 2018 08:42:22 -  1.27
+++ regress/sbin/route/Makefile 19 Feb 2018 09:45:43 -
@@ -376,7 +376,24 @@ rttest${n}:
${RCMD} add -net 192.168.67.128/25 130.102.71.67 -priority 3 -ifp vlan99
${RCMD} show -inet 2>&1 | \
sed -e "s,link\#[0-9 ]*U,link#  U," | \
-   cat > ${.CURDIR}/${.TARGET}.ok
+   diff -u ${.CURDIR}/${.TARGET}.ok /dev/stdin
+
+# Check that removing a RTF_CLONING route do not remove children from
+# another cloning route.
+n= 33
+RTTEST_TARGETS+:=rttest${n}
+rttest${n}:
+   # Use vether(4) because we need IFT_ETHER interfaces
+   # for the auto-magic RTF_CLONING routes.
+   ${SUDO} ifconfig vether99 rdomain ${RDOMAIN} lladdr fe: