Re: rdist/rdistd: use mode_t for file modes

2016-03-31 Thread Philip Guenther
On Wed, Mar 30, 2016 at 1:57 PM, Todd C. Miller
 wrote:
> The file mode is passed from client to server as a printf string
> formatted with %04o (unsigned) so use strtoul() not strtol() to
> parse it.  Error out on modes > 0.
>
> There is no way that the mode can ever be -1 so remove those checks.

Uh, yes there is: recvdir() passes mode=-1 to fchog(), which passes it
through to setfilemode().

I'm not sure fchog() is a great abstraction, with fd sometimes -1 and
mode sometimes -1.  Maybe the owner/group -> uid/gid mapping bit
should be broken out into a routine which can be shared by two
routines, one for the fd!=-1 case and one for the fd==-1 case?
Inlining and simplifying the setfilemode() and setownership() bits is
then easy.  I.e., more like how pax was written to do it.  :-)

Also dislike the lack of getpwnam()/getgrnam() caching.  Even a single
entry cache would be a big win for the common case.


Philip Guenther



Re: [patch] login_yubikey: delete keys

2016-03-31 Thread fritjof
On Thu, Mar 31, 2016 at 10:17:45PM +0200, Sebastian Benoit wrote:
> Hi Fritjof,
> 
> frit...@alokat.org(frit...@alokat.org) on 2016.03.31 11:43:58 +0200:
> > Wipe out the key from "user.key".
> > 
> > --f.
> > 
> The while loop above has return(AUTH_FAILED) so you dont zero in those
> cases. Can you change that?
> 

Yeah, sure. See patch below.

> 
> Also your diff does not apply, i think it has tab vs space issues.
> 

Ah, shit. Should work now.


Index: login_yubikey.c
===
RCS file: /cvs/src/libexec/login_yubikey/login_yubikey.c,v
retrieving revision 1.13
diff -u -r1.13 login_yubikey.c
--- login_yubikey.c 22 Oct 2015 23:56:30 -  1.13
+++ login_yubikey.c 31 Mar 2016 21:35:28 -
@@ -228,6 +228,8 @@
yubikey_hex_decode(uid, hexuid, YUBIKEY_UID_SIZE);
yubikey_hex_decode(key, hexkey, YUBIKEY_KEY_SIZE);
 
+   explicit_bzero(hexkey, sizeof(hexkey));
+
/*
 * Cycle through the key mapping table.
  * XXX brute force, unoptimized; a lookup table for valid mappings may
@@ -239,6 +241,7 @@
case EMSGSIZE:
syslog(LOG_INFO, "user %s failed: password too short.",
username);
+   explicit_bzero(key, sizeof(key));
return (AUTH_FAILED);
case EINVAL:/* keyboard mapping invalid */
continue;
@@ -264,14 +267,18 @@
syslog(LOG_INFO, "user %s: could not decode password "
"with any keymap (%d crc ok)",
username, crcok);
+   explicit_bzero(key, sizeof(key));
return (AUTH_FAILED);
default:
syslog(LOG_DEBUG, "user %s failed: %s",
username, strerror(r));
+   explicit_bzero(key, sizeof(key));
return (AUTH_FAILED);
}
break; /* only reached through the bottom of case 0 */
}
+
+   explicit_bzero(key, sizeof(key));
 
syslog(LOG_INFO, "user %s uid %s: %d matching keymaps (%d checked), "
"%d crc ok", username, hexuid, mapok, i, crcok);



Re: [patch] login_yubikey: delete keys

2016-03-31 Thread Sebastian Benoit
Hi Fritjof,

frit...@alokat.org(frit...@alokat.org) on 2016.03.31 11:43:58 +0200:
> Wipe out the key from "user.key".
> 
> --f.
> 
> Index: login_yubikey.c
> ===
> RCS file: /cvs/src/libexec/login_yubikey/login_yubikey.c,v
> retrieving revision 1.10
> diff -u -p -u -r1.10 login_yubikey.c
> --- login_yubikey.c 16 Jan 2015 06:39:50 -  1.10
> +++ login_yubikey.c 31 Mar 2016 09:38:01 -
> @@ -224,6 +224,8 @@ yubikey_login(const char *username, cons
> yubikey_hex_decode(uid, hexuid, YUBIKEY_UID_SIZE);
> yubikey_hex_decode(key, hexkey, YUBIKEY_KEY_SIZE);
>  
> +   explicit_bzero(hexkey, sizeof(hexkey));
> +
> /* 
>  * Cycle through the key mapping table.
>   * XXX brute force, unoptimized; a lookup table for valid mappings 
> may
> @@ -268,6 +270,8 @@ yubikey_login(const char *username, cons
> }
> break; /* only reached through the bottom of case 0 */
> }
> +
> +   explicit_bzero(key, sizeof(key));

The while loop above has return(AUTH_FAILED) so you dont zero in those
cases. Can you change that?

>  
> syslog(LOG_INFO, "user %s uid %s: %d matching keymaps (%d checked), "
> "%d crc ok", username, hexuid, mapok, i, crcok);
> 

Also your diff does not apply, i think it has tab vs space issues.



Re: [PATCH] Proposal to remove -f for arp(8) and ndp(8)

2016-03-31 Thread Sebastian Benoit
ok

Jeremie Courreges-Anglas(j...@wxcvbn.org) on 2016.03.31 19:16:14 +0200:
> Jeremie Courreges-Anglas  writes:
> 
> > Mike Belopuhov  writes:
> >
> >> Good day, Dimitris.
> >>
> >> Long time ago in a galaxy far far away I've been using this
> >> alongside the -F option that I've added.  While managed
> >> switches are becoming cheaper, I don't see a reason for a
> >> working feature to go away, especially since there has been
> >> zero rationale provided apart from "ndp -f" doesn't work.
> >> Well, then how about fixing ndp?
> > [...]
> >
> > Diff below, seems to work fine.
> 
> Now with the manpage improvements initially proposed by Dimitris.
> I have kept my ndp.c diff, because it is more consistent with the style
> of the rest of the file.  Dimitris's diff points out that main() never
> reports failures (exit(0)), it would be nice to fix.
> 
> ok?
> 
> Index: ndp.8
> ===
> RCS file: /cvs/src/usr.sbin/ndp/ndp.8,v
> retrieving revision 1.35
> diff -u -p -r1.35 ndp.8
> --- ndp.8 5 Oct 2015 10:25:19 -   1.35
> +++ ndp.8 31 Mar 2016 16:49:53 -
> @@ -117,6 +117,12 @@ Delete the specified NDP entry.
>  .It Fl f Ar filename
>  Parse the file specified by
>  .Ar filename .
> +Entries in the file should be of the form:
> +.Bd -ragged -offset indent -compact
> +.Ar nodename etheraddr
> +.Op Ar temp
> +.Op Ar proxy
> +.Ed
>  .It Fl H
>  Harmonize consistency between the routing table and the default router
>  list; install the top entry of the list into the kernel routing table.
> Index: ndp.c
> ===
> RCS file: /cvs/src/usr.sbin/ndp/ndp.c,v
> retrieving revision 1.69
> diff -u -p -r1.69 ndp.c
> --- ndp.c 26 Jan 2016 18:26:19 -  1.69
> +++ ndp.c 31 Mar 2016 16:49:53 -
> @@ -241,6 +241,11 @@ main(int argc, char *argv[])
>   }
>   delete(arg);
>   break;
> + case 'f':
> + if (argc != 0)
> + usage();
> + file(arg);
> + break;
>   case 'p':
>   if (argc != 0) {
>   usage();
> 
> 
> -- 
> jca | PGP : 0x1524E7EE / 5135 92C1 AD36 5293 2BDF  DDCC 0DFA 74AE 1524 E7EE
> 

-- 



Re: [PATCH] Proposal to remove -f for arp(8) and ndp(8)

2016-03-31 Thread Jeremie Courreges-Anglas
Jeremie Courreges-Anglas  writes:

> Mike Belopuhov  writes:
>
>> Good day, Dimitris.
>>
>> Long time ago in a galaxy far far away I've been using this
>> alongside the -F option that I've added.  While managed
>> switches are becoming cheaper, I don't see a reason for a
>> working feature to go away, especially since there has been
>> zero rationale provided apart from "ndp -f" doesn't work.
>> Well, then how about fixing ndp?
> [...]
>
> Diff below, seems to work fine.

Now with the manpage improvements initially proposed by Dimitris.
I have kept my ndp.c diff, because it is more consistent with the style
of the rest of the file.  Dimitris's diff points out that main() never
reports failures (exit(0)), it would be nice to fix.

ok?

Index: ndp.8
===
RCS file: /cvs/src/usr.sbin/ndp/ndp.8,v
retrieving revision 1.35
diff -u -p -r1.35 ndp.8
--- ndp.8   5 Oct 2015 10:25:19 -   1.35
+++ ndp.8   31 Mar 2016 16:49:53 -
@@ -117,6 +117,12 @@ Delete the specified NDP entry.
 .It Fl f Ar filename
 Parse the file specified by
 .Ar filename .
+Entries in the file should be of the form:
+.Bd -ragged -offset indent -compact
+.Ar nodename etheraddr
+.Op Ar temp
+.Op Ar proxy
+.Ed
 .It Fl H
 Harmonize consistency between the routing table and the default router
 list; install the top entry of the list into the kernel routing table.
Index: ndp.c
===
RCS file: /cvs/src/usr.sbin/ndp/ndp.c,v
retrieving revision 1.69
diff -u -p -r1.69 ndp.c
--- ndp.c   26 Jan 2016 18:26:19 -  1.69
+++ ndp.c   31 Mar 2016 16:49:53 -
@@ -241,6 +241,11 @@ main(int argc, char *argv[])
}
delete(arg);
break;
+   case 'f':
+   if (argc != 0)
+   usage();
+   file(arg);
+   break;
case 'p':
if (argc != 0) {
usage();


-- 
jca | PGP : 0x1524E7EE / 5135 92C1 AD36 5293 2BDF  DDCC 0DFA 74AE 1524 E7EE



Re: Aq macro replacements

2016-03-31 Thread Ingo Schwarze
Hi Christian,

Christian Heckendorf wrote on Sun, Mar 27, 2016 at 11:03:02PM -0400:

>  - spamd-setup(8) is referring to pf tables. syslogd(8) and tcpdump(8)
>use Aq to format program I/O examples. None of these instances use
>mathematical unicode symbols in practice.

Fixed differently.

I committed your other four patches unchanged.

Thanks,
  Ingo

> [1] http://marc.info/?l=openbsd-tech=142399113828201=2



Re: increase v_specbitmap size (allow more cloned devices)

2016-03-31 Thread Todd C. Miller
On Thu, 31 Mar 2016 16:37:35 +0200, Martin Natano wrote:

> Sure; see the updated patch below. Ok?

OK millert@

 - todd



Re: increase v_specbitmap size (allow more cloned devices)

2016-03-31 Thread Mike Belopuhov
On 31 March 2016 at 16:37, Martin Natano  wrote:
> On Thu, Mar 31, 2016 at 06:02:07AM -0600, Todd C. Miller wrote:
>> On Thu, 31 Mar 2016 09:34:32 +0200, Martin Natano wrote:
>>
>> > Thank you all for the input. Allocatig the bitmap via malloc() really
>> > seems like the way to go, so we don't waste space for non-cloning
>> > devices. See updated patch below.
>> >
>> > Would it make sense to move the (rdev == VCHR && ...) condition to a
>> > macro in ? I figured it's only used twice, so I inlined
>> > it.
>>
>> I think it is fine to inline that but please put a set of parens
>> around the bitwise and part for readability.
>
> Sure; see the updated patch below. Ok?
>
> natano
>

Looks good to me. OK mikeb



Re: increase v_specbitmap size (allow more cloned devices)

2016-03-31 Thread Martin Natano
On Thu, Mar 31, 2016 at 06:02:07AM -0600, Todd C. Miller wrote:
> On Thu, 31 Mar 2016 09:34:32 +0200, Martin Natano wrote:
> 
> > Thank you all for the input. Allocatig the bitmap via malloc() really
> > seems like the way to go, so we don't waste space for non-cloning
> > devices. See updated patch below.
> > 
> > Would it make sense to move the (rdev == VCHR && ...) condition to a
> > macro in ? I figured it's only used twice, so I inlined
> > it.
> 
> I think it is fine to inline that but please put a set of parens
> around the bitwise and part for readability.

Sure; see the updated patch below. Ok?

natano


Index: sys/specdev.h
===
RCS file: /cvs/src/sys/sys/specdev.h,v
retrieving revision 1.34
diff -u -p -r1.34 specdev.h
--- sys/specdev.h   2 Nov 2013 00:16:31 -   1.34
+++ sys/specdev.h   31 Mar 2016 12:32:53 -
@@ -46,7 +46,7 @@ struct specinfo {
daddr_t si_lastr;
union {
struct vnode *ci_parent; /* pointer back to parent device */
-   u_int8_t ci_bitmap[8]; /* bitmap of devices cloned off us */
+   u_int8_t *ci_bitmap; /* bitmap of devices cloned off us */
} si_ci;
 };
 
@@ -71,6 +71,7 @@ struct cloneinfo {
  * This gives us 8 bits for encoding the real minor number.
  */
 #define CLONE_SHIFT8
+#define CLONE_MAP_SZ   128
 
 /*
  * Special device management
Index: kern/vfs_subr.c
===
RCS file: /cvs/src/sys/kern/vfs_subr.c,v
retrieving revision 1.240
diff -u -p -r1.240 vfs_subr.c
--- kern/vfs_subr.c 19 Mar 2016 12:04:15 -  1.240
+++ kern/vfs_subr.c 31 Mar 2016 12:32:54 -
@@ -555,7 +555,13 @@ loop:
nvp->v_specnext = *vpp;
nvp->v_specmountpoint = NULL;
nvp->v_speclockf = NULL;
-   memset(nvp->v_specbitmap, 0, sizeof(nvp->v_specbitmap));
+   nvp->v_specbitmap = NULL;
+   if (nvp_rdev == VCHR &&
+   (cdevsw[major(nvp_rdev)].d_flags & D_CLONE) &&
+   minor(nvp_rdev) >> CLONE_SHIFT == 0) {
+   nvp->v_specbitmap = malloc(CLONE_MAP_SZ, M_VNODE,
+   M_WAITOK | M_ZERO);
+   }
*vpp = nvp;
if (vp != NULLVP) {
nvp->v_flag |= VALIASED;
@@ -1092,6 +1098,11 @@ vgonel(struct vnode *vp, struct proc *p)
if (vq == NULL)
vx->v_flag &= ~VALIASED;
vp->v_flag &= ~VALIASED;
+   }
+   if (vp->v_rdev == VCHR &&
+   (cdevsw[major(vp->v_rdev)].d_flags & D_CLONE) &&
+   minor(vp->v_rdev) >> CLONE_SHIFT == 0) {
+   free(vp->v_specbitmap, M_VNODE, CLONE_MAP_SZ);
}
free(vp->v_specinfo, M_VNODE, sizeof(struct specinfo));
vp->v_specinfo = NULL;
Index: kern/spec_vnops.c
===
RCS file: /cvs/src/sys/kern/spec_vnops.c,v
retrieving revision 1.86
diff -u -p -r1.86 spec_vnops.c
--- kern/spec_vnops.c   19 Mar 2016 12:04:15 -  1.86
+++ kern/spec_vnops.c   31 Mar 2016 12:32:54 -
@@ -707,13 +707,14 @@ spec_open_clone(struct vop_open_args *ap
if (minor(vp->v_rdev) >= (1 << CLONE_SHIFT))
return (ENXIO);
 
-   for (i = 1; i < sizeof(vp->v_specbitmap) * NBBY; i++)
+   for (i = 1; i < CLONE_MAP_SZ * NBBY; i++) {
if (isclr(vp->v_specbitmap, i)) {
setbit(vp->v_specbitmap, i);
break;
}
+   }
 
-   if (i == sizeof(vp->v_specbitmap) * NBBY)
+   if (i == CLONE_MAP_SZ * NBBY)
return (EBUSY); /* too many open instances */
 
error = cdevvp(makedev(major(vp->v_rdev),



Re: increase v_specbitmap size (allow more cloned devices)

2016-03-31 Thread Martin Natano
On Thu, Mar 31, 2016 at 12:31:14PM +, Miod Vallat wrote:
> 
> > Thank you all for the input. Allocatig the bitmap via malloc() really
> > seems like the way to go, so we don't waste space for non-cloning
> > devices. See updated patch below.
> >
> > Would it make sense to move the (rdev == VCHR && ...) condition to a
> > macro in ? I figured it's only used twice, so I inlined
> > it.
> >
> > Comments/Ok?
> 
> You can get rid of the `clonable VCHR device' test in vgonel() and call
> free() v_specbitmap unconditionally.
> 

Unfortunately, this is not possible. v_specbitmap is a #define that
resolves to a member in a union, so it could be mangled in vnodes that
don't represent clonable VCHR devices.

The relevant parts from :

struct specinfo {
[...]
union {
struct vnode *ci_parent; /* pointer back to parent device */
u_int8_t *ci_bitmap; /* bitmap of devices cloned off us */
} si_ci;
};
[...]
#define v_specbitmap v_specinfo->si_ci.ci_bitmap


natano



Re: increase v_specbitmap size (allow more cloned devices)

2016-03-31 Thread Miod Vallat

> Thank you all for the input. Allocatig the bitmap via malloc() really
> seems like the way to go, so we don't waste space for non-cloning
> devices. See updated patch below.
>
> Would it make sense to move the (rdev == VCHR && ...) condition to a
> macro in ? I figured it's only used twice, so I inlined
> it.
>
> Comments/Ok?

You can get rid of the `clonable VCHR device' test in vgonel() and call
free() v_specbitmap unconditionally.



Re: increase v_specbitmap size (allow more cloned devices)

2016-03-31 Thread Todd C. Miller
On Thu, 31 Mar 2016 09:34:32 +0200, Martin Natano wrote:

> Thank you all for the input. Allocatig the bitmap via malloc() really
> seems like the way to go, so we don't waste space for non-cloning
> devices. See updated patch below.
> 
> Would it make sense to move the (rdev == VCHR && ...) condition to a
> macro in ? I figured it's only used twice, so I inlined
> it.

I think it is fine to inline that but please put a set of parens
around the bitwise and part for readability.

 - todd



Re: move "privileged port" check out of in(6)_pcbaddrisavail()

2016-03-31 Thread Alexander Bluhm
On Wed, Mar 30, 2016 at 10:44:14PM +0200, Vincent Gross wrote:
> This diff moves the "are we binding to a privileged port while not being root 
> ?"
> check from in(6)_pcbaddrisavail() to in_pcbbind().

> --- sys/netinet/in_pcb.c  26 Mar 2016 21:56:04 -  1.198
> +++ sys/netinet/in_pcb.c  30 Mar 2016 20:33:00 -
> @@ -341,9 +341,14 @@ in_pcbbind(struct inpcb *inp, struct mbu
>   }
>   }
>  
> - if (lport == 0)
> + if (lport == 0) {
>   if ((error = in_pcbpickport(, wild, inp, p)))
>   return (error);
> + } else {
> + if (ntohs(lport) < IPPORT_RESERVED &&
> + (error = suser(p, 0)))
> + return (EACCES);
> + }
>   inp->inp_lport = lport;

At this point inp has already been modified.  So when we bail out
with EACCES here, we have a partially successful system call.

Move the assignments
inp->inp_laddr6 = sin6->sin6_addr;
inp->inp_laddr = sin->sin_addr;
down after the return (EACCES).

Looks like that return (error) was wrong before.

bluhm



bgpd: dispatch_rtmsg_addr[change] mpath route not found

2016-03-31 Thread Peter Hessler
We see occasional bgpd deaths during boot.  This is apparently caused by
a race with ospfd starting up.

The underlying problem is we are reciving a CHANGE route message for an
MPATH path, where the gateway doesn't match the route message.  In our
case, the prefix is assigned to a Connected (and Backup/Down) carp
device, and we are reciving routes for it from several of our ospf
neighbors.

ospfd simply treats those as an "add" instead of a "change".  I would
like to do the same thing in bgpd.

OK?


Index: usr.sbin/bgpd/kroute.c
===
RCS file: /cvs/src/usr.sbin/bgpd/kroute.c,v
retrieving revision 1.207.2.1
diff -u -p -u -p -r1.207.2.1 kroute.c
--- usr.sbin/bgpd/kroute.c  23 Mar 2016 13:41:37 -  1.207.2.1
+++ usr.sbin/bgpd/kroute.c  31 Mar 2016 10:37:05 -
@@ -3251,7 +3251,7 @@ dispatch_rtmsg_addr(struct rt_msghdr *rt
(kr = kroute_matchgw(kr, sa_in)) == NULL) {
log_warnx("dispatch_rtmsg_addr[change] "
"mpath route not found");
-   return (-1);
+   goto add4;
} else if (mpath && rtm->rtm_type == RTM_ADD)
goto add4;
 
@@ -3323,7 +3323,7 @@ add4:
NULL) {
log_warnx("dispatch_rtmsg[change] "
"IPv6 mpath route not found");
-   return (-1);
+   goto add6;
} else if (mpath && rtm->rtm_type == RTM_ADD)
goto add6;
 

-- 
If A equals success, then the formula is _A = _X + _Y + _Z.  _X is work.  
_Y
is play.  _Z is keep your mouth shut.
-- Albert Einstein



[patch] login_yubikey: delete keys

2016-03-31 Thread fritjof
Wipe out the key from "user.key".

--f.

Index: login_yubikey.c
===
RCS file: /cvs/src/libexec/login_yubikey/login_yubikey.c,v
retrieving revision 1.10
diff -u -p -u -r1.10 login_yubikey.c
--- login_yubikey.c 16 Jan 2015 06:39:50 -  1.10
+++ login_yubikey.c 31 Mar 2016 09:38:01 -
@@ -224,6 +224,8 @@ yubikey_login(const char *username, cons
yubikey_hex_decode(uid, hexuid, YUBIKEY_UID_SIZE);
yubikey_hex_decode(key, hexkey, YUBIKEY_KEY_SIZE);
 
+   explicit_bzero(hexkey, sizeof(hexkey));
+
/* 
 * Cycle through the key mapping table.
  * XXX brute force, unoptimized; a lookup table for valid mappings may
@@ -268,6 +270,8 @@ yubikey_login(const char *username, cons
}
break; /* only reached through the bottom of case 0 */
}
+
+   explicit_bzero(key, sizeof(key));
 
syslog(LOG_INFO, "user %s uid %s: %d matching keymaps (%d checked), "
"%d crc ok", username, hexuid, mapok, i, crcok);



Re: remove PPPOE_SERVER codepaths

2016-03-31 Thread Mike Belopuhov
On 31 March 2016 at 08:49, Jonathan Gray  wrote:
> if_spppsubr.c has:
>
> revision 1.29
> date: 2005/03/24 16:37:52;  author: claudio;  state: Exp;  lines: +15 -9;
> Unbreak tree, mono_time may no longer be used because of timecounters.
> Use getmicrouptime() instead. Found by grange@ and henning@.
> OK henning@
>
> It turns out PPPOE_SERVER paths have been broken since at least then
> so let's remove them:
>

Sure.



Re: move "privileged port" check out of in(6)_pcbaddrisavail()

2016-03-31 Thread Mike Belopuhov
On 31 March 2016 at 09:55, Martin Pieuchot  wrote:
> On 30/03/16(Wed) 22:44, Vincent Gross wrote:
>> Hello,
>>
>> This diff moves the "are we binding to a privileged port while not being 
>> root ?"
>> check from in(6)_pcbaddrisavail() to in_pcbbind().
>>
>> This way we have a cleaner separation between "is the resource available ?"
>> and "am I allowed to access the resource ?" (which may or may not get its own
>> function later).
>>
>> Also, it unbreaks naddy@'s iked setup (ikev2:sendmsg([::]:500) =>
>> in6_selectsrc() != in6p->inp_laddr6 => in6_pcbaddrisavail() => EPERM).
>>
>> Ok ?
>
> If you remove the KAME comment and your name, yes.
>

Yeah, these comments wouldn't be relevant after the change.



Re: [patch] Fix carp(4) with balancing ip / ip-stealth

2016-03-31 Thread Martin Pieuchot
On 30/03/16(Wed) 18:04, Florian Riehm wrote:
> On 03/01/16 23:03, Martin Pieuchot wrote:
> > On 18/02/16(Thu) 16:46, Florian Riehm wrote:
> >> On 02/16/16 11:23, Martin Pieuchot wrote:
> >>> On 12/02/16(Fri) 16:33, Florian Riehm wrote:
>  Hi Tech,
> 
>  I have noticed that CARP IP-Balancing is broken, so I am testing and
>  fixing it.
> 
>  The first problem came in with this commit:
>  http://cvsweb.openbsd.org/cgi-bin/cvsweb/src/sys/net/if_ethersubr.c.diff?r1=1.176=1.177
>  It enforces that outgoing packets use the src mac of the carp interface 
>  instead
>  the mac of its carpdev. That's a good idea for failover carp but it 
>  breaks
>  balancing ip(-stealth), because the same source MAC is seen by multiple
>  switchports now. That leads to continues MAC flapping at switches.
>  My attached patch fixes the problem. Thanks Martin's refactoring, we can
>  enforce the right MAC on a central place in carp_start() now.
> >>>
> >>> Can't you completely get rid of the else chunk then?  It looks like a
> >>> bad refactoring.
> >>
> >> I was also thinking about deleting the else chunk. At the end I decided to 
> >> keep
> >> it, because I wasn't sure about its purpose.
> >> My intention was:
> >> Fix the more or less obviously broken balancing case and don't change the
> >> behavior of other CARP modes.
> >> I will remove the else chunk and do some more testing to determine if it's
> >> really unnecessary.
> >>
> >>> I'm also wondering can't we use "sc_realmac" here?
> >>
> >> I have not know "sc_realmac" until now. As far as I understand the 
> >> intention of
> >> the sc_realmac flag is, to indicate that the user has overwritten the mac
> >> address of the carp interface with the mac of its parent. I have never had 
> >> a
> >> use case to do this. Without overwriting the mac by hand sc_realmac is 0 
> >> for
> >> carp with balancing ip/ip-stealth AND carp without balancing.
> >>
> >> I don't see how sc_realmac could help us by this problem?!
> >> Can you give me a hint please?
> > 
> > It was an open question, if that does not make any sense, then don't do
> > it ;)
> > 
>  A second problem was introduced two weeks ago. Since we always check the
>  destination MAC address of received unicast packets, not only when in
>  promiscuous mode, carp balancing ip is always broken, not only when in
>  promiscuous mode. The new check collides with "Clear mcast if received 
>  on a
>  carp IP balanced address." in carp_input().
> >>>
> >>> Could you describe the problem?  Which Destination MAC the packet
> >>> contains and why it doesn't match the interface?  I still don't grok why
> >>> a carp(4) interface in balancing mode cannot have the right MAC
> >>> configured...  Do you know?  But if what you want is the parent's MAC,
> >>> then I'd rather go with the symmetric of your other change:  modify
> >>> carp_input() to overwrite ``ether_dhost''.
> >>
> >> If using carp balancing, incoming packets contain the destination mac 
> >> address
> >> of the carp interface, let's say: 01:00:5e:00:01:01.
> >> The MCAST-Bit is set here!
> >>
> >> carp_input() removes the MCAST-Bit:
> >>/*
> >> * Clear mcast if received on a carp IP balanced address.
> >> */
> >>if (sc->sc_balancing == CARP_BAL_IP &&
> >>ETHER_IS_MULTICAST(eh->ether_dhost))
> >>*(eh->ether_dhost) &= ~0x01;
> >>
> >> Then we run into ether_input() and 
> >> memcmp(ac->ac_enaddr, eh->ether_dhost, ETHER_ADDR_LEN)
> >> evaluates to false because 
> >> ac->ac_enaddr (01:00:5e:00:01:01) != eh->ether_dhost 
> >> (00:00:5e:00:01:01)
> > 
> > Taking a step back why do you want to clear the MULTICAST bit?
> > 
> > If it is just to differentiate between advertisement and normal traffic
> > couldn't we use a mbuf_tag(9) for CARP advertisement instead?  The tag
> > could be set in carp_input() and checked in carp_lsdrop() in the input
> > path.  It could also be used in the output path to get rid of the
> > horrible ``cur_vhe'' hack.
> > 
> Thanks for your comments. I took a look to the big picture:
> The idea behind carp balancing is that clients send all their packets to 
> layer 2
> multicast addresses. Switches flood those packets to all ports and every
> CARP-System receives them. Only one system processes the packet, the other
> systems drop it. The processing system handles the packet like a normal 
> unicast
> packet. The packet distribution described above is the only reason why the
> multicast bit is required. We don't want further multicast handling.
> 
> What happens if we don't remove the MCAST bit inside carp_input()?
> In ether_input() packets with MCAST bit get the mbuf flag M_MCAST. From now 
> the
> kernel assumes to deal with real multicast packets. Upper layers will do many
> things we don't want, e.g. tcp_input() and icmp_input_if() would drop the
> packet.
>  
> What can we do?
> 1.) Introduce a new mbuf flag to mark the 

Re: move "privileged port" check out of in(6)_pcbaddrisavail()

2016-03-31 Thread Martin Pieuchot
On 30/03/16(Wed) 22:44, Vincent Gross wrote:
> Hello,
> 
> This diff moves the "are we binding to a privileged port while not being root 
> ?"
> check from in(6)_pcbaddrisavail() to in_pcbbind().
> 
> This way we have a cleaner separation between "is the resource available ?"
> and "am I allowed to access the resource ?" (which may or may not get its own
> function later).
> 
> Also, it unbreaks naddy@'s iked setup (ikev2:sendmsg([::]:500) =>
> in6_selectsrc() != in6p->inp_laddr6 => in6_pcbaddrisavail() => EPERM).
> 
> Ok ?

If you remove the KAME comment and your name, yes.

> Index: sys/netinet/in_pcb.c
> ===
> RCS file: /cvs/src/sys/netinet/in_pcb.c,v
> retrieving revision 1.198
> diff -u -p -r1.198 in_pcb.c
> --- sys/netinet/in_pcb.c  26 Mar 2016 21:56:04 -  1.198
> +++ sys/netinet/in_pcb.c  30 Mar 2016 20:33:00 -
> @@ -341,9 +341,14 @@ in_pcbbind(struct inpcb *inp, struct mbu
>   }
>   }
>  
> - if (lport == 0)
> + if (lport == 0) {
>   if ((error = in_pcbpickport(, wild, inp, p)))
>   return (error);
> + } else {
> + if (ntohs(lport) < IPPORT_RESERVED &&
> + (error = suser(p, 0)))
> + return (EACCES);
> + }
>   inp->inp_lport = lport;
>   in_pcbrehash(inp);
>   return (0);
> @@ -357,7 +362,6 @@ in_pcbaddrisavail(struct inpcb *inp, str
>   struct inpcbtable *table = inp->inp_table;
>   u_int16_t lport = sin->sin_port;
>   int reuseport = (so->so_options & SO_REUSEPORT);
> - int error;
>  
>   if (IN_MULTICAST(sin->sin_addr.s_addr)) {
>   /*
> @@ -398,9 +402,6 @@ in_pcbaddrisavail(struct inpcb *inp, str
>   struct inpcb *t;
>  
>   /* GROSS */
> - if (ntohs(lport) < IPPORT_RESERVED &&
> - (error = suser(p, 0)))
> - return (EACCES);
>   if (so->so_euid) {
>   t = in_pcblookup(table, _addr, 0,
>   >sin_addr, lport, INPLOOKUP_WILDCARD,
> Index: sys/netinet6/in6_pcb.c
> ===
> RCS file: /cvs/src/sys/netinet6/in6_pcb.c,v
> retrieving revision 1.90
> diff -u -p -r1.90 in6_pcb.c
> --- sys/netinet6/in6_pcb.c30 Mar 2016 13:02:22 -  1.90
> +++ sys/netinet6/in6_pcb.c30 Mar 2016 20:33:01 -
> @@ -158,7 +158,6 @@ in6_pcbaddrisavail(struct inpcb *inp, st
>   struct inpcbtable *table = inp->inp_table;
>   u_short lport = sin6->sin6_port;
>   int reuseport = (so->so_options & SO_REUSEPORT);
> - int error;
>  
>   wild |= INPLOOKUP_IPV6;
>   /* KAME hack: embed scopeid */
> @@ -226,8 +225,6 @@ in6_pcbaddrisavail(struct inpcb *inp, st
>* finding a process for a socket instead of using
>* curproc?  (Marked with BSD's {in,}famous XXX ?
>*/
> - if (ntohs(lport) < IPPORT_RESERVED && (error = suser(p, 0)))
> - return error;
>   if (so->so_euid) {
>   t = in_pcblookup(table,
>   (struct in_addr *)_addr, 0,
> 



Re: increase v_specbitmap size (allow more cloned devices)

2016-03-31 Thread Martin Natano
Thank you all for the input. Allocatig the bitmap via malloc() really
seems like the way to go, so we don't waste space for non-cloning
devices. See updated patch below.

Would it make sense to move the (rdev == VCHR && ...) condition to a
macro in ? I figured it's only used twice, so I inlined
it.

Comments/Ok?

natano


Index: sys/specdev.h
===
RCS file: /cvs/src/sys/sys/specdev.h,v
retrieving revision 1.34
diff -u -p -r1.34 specdev.h
--- sys/specdev.h   2 Nov 2013 00:16:31 -   1.34
+++ sys/specdev.h   31 Mar 2016 07:16:53 -
@@ -46,7 +46,7 @@ struct specinfo {
daddr_t si_lastr;
union {
struct vnode *ci_parent; /* pointer back to parent device */
-   u_int8_t ci_bitmap[8]; /* bitmap of devices cloned off us */
+   u_int8_t *ci_bitmap; /* bitmap of devices cloned off us */
} si_ci;
 };
 
@@ -71,6 +71,7 @@ struct cloneinfo {
  * This gives us 8 bits for encoding the real minor number.
  */
 #define CLONE_SHIFT8
+#define CLONE_MAP_SZ   128
 
 /*
  * Special device management
Index: kern/vfs_subr.c
===
RCS file: /cvs/src/sys/kern/vfs_subr.c,v
retrieving revision 1.240
diff -u -p -r1.240 vfs_subr.c
--- kern/vfs_subr.c 19 Mar 2016 12:04:15 -  1.240
+++ kern/vfs_subr.c 31 Mar 2016 07:16:53 -
@@ -555,7 +555,13 @@ loop:
nvp->v_specnext = *vpp;
nvp->v_specmountpoint = NULL;
nvp->v_speclockf = NULL;
-   memset(nvp->v_specbitmap, 0, sizeof(nvp->v_specbitmap));
+   nvp->v_specbitmap = NULL;
+   if (nvp_rdev == VCHR &&
+   cdevsw[major(nvp_rdev)].d_flags & D_CLONE &&
+   minor(nvp_rdev) >> CLONE_SHIFT == 0) {
+   nvp->v_specbitmap = malloc(CLONE_MAP_SZ, M_VNODE,
+   M_WAITOK | M_ZERO);
+   }
*vpp = nvp;
if (vp != NULLVP) {
nvp->v_flag |= VALIASED;
@@ -1092,6 +1098,11 @@ vgonel(struct vnode *vp, struct proc *p)
if (vq == NULL)
vx->v_flag &= ~VALIASED;
vp->v_flag &= ~VALIASED;
+   }
+   if (vp->v_rdev == VCHR &&
+   cdevsw[major(vp->v_rdev)].d_flags & D_CLONE &&
+   minor(vp->v_rdev) >> CLONE_SHIFT == 0) {
+   free(vp->v_specbitmap, M_VNODE, CLONE_MAP_SZ);
}
free(vp->v_specinfo, M_VNODE, sizeof(struct specinfo));
vp->v_specinfo = NULL;
Index: kern/spec_vnops.c
===
RCS file: /cvs/src/sys/kern/spec_vnops.c,v
retrieving revision 1.86
diff -u -p -r1.86 spec_vnops.c
--- kern/spec_vnops.c   19 Mar 2016 12:04:15 -  1.86
+++ kern/spec_vnops.c   31 Mar 2016 07:16:53 -
@@ -707,13 +707,14 @@ spec_open_clone(struct vop_open_args *ap
if (minor(vp->v_rdev) >= (1 << CLONE_SHIFT))
return (ENXIO);
 
-   for (i = 1; i < sizeof(vp->v_specbitmap) * NBBY; i++)
+   for (i = 1; i < CLONE_MAP_SZ * NBBY; i++) {
if (isclr(vp->v_specbitmap, i)) {
setbit(vp->v_specbitmap, i);
break;
}
+   }
 
-   if (i == sizeof(vp->v_specbitmap) * NBBY)
+   if (i == CLONE_MAP_SZ * NBBY)
return (EBUSY); /* too many open instances */
 
error = cdevvp(makedev(major(vp->v_rdev),



remove PPPOE_SERVER codepaths

2016-03-31 Thread Jonathan Gray
if_spppsubr.c has:

revision 1.29
date: 2005/03/24 16:37:52;  author: claudio;  state: Exp;  lines: +15 -9;
Unbreak tree, mono_time may no longer be used because of timecounters.
Use getmicrouptime() instead. Found by grange@ and henning@.
OK henning@

It turns out PPPOE_SERVER paths have been broken since at least then
so let's remove them:

../../../../net/if_pppoe.c: In function 'pppoe_send_pads':
../../../../net/if_pppoe.c:1461: error: 'mono_time' undeclared (first use in 
this function)
../../../../net/if_pppoe.c:1461: error: (Each undeclared identifier is reported 
only once
../../../../net/if_pppoe.c:1461: error: for each function it appears in.)

Index: share/man/man4/pppoe.4
===
RCS file: /cvs/src/share/man/man4/pppoe.4,v
retrieving revision 1.29
diff -u -p -r1.29 pppoe.4
--- share/man/man4/pppoe.4  12 Aug 2015 09:15:49 -  1.29
+++ share/man/man4/pppoe.4  31 Mar 2016 06:39:05 -
@@ -153,19 +153,6 @@ driver with this option set will send a 
 The peer will immediately disconnect
 the orphaned session and allow a new one to be established.
 .Pp
-If the kernel is compiled with option
-.Dv PPPOE_SERVER ,
-there are two modes of connection, controlled via the
-.Em link0
-switch.
-The default mode,
-.Em link0
-not being set, is client mode.
-The
-.Dq PPPoE server
-mode, selected by setting
-.Em link0 ,
-is to wait for incoming PPPoE sessions.
 .Sh MTU/MSS ISSUES
 Problems can arise on machines with private IPs connecting to the Internet
 via a machine running both
Index: sys/net/if_ethersubr.c
===
RCS file: /cvs/src/sys/net/if_ethersubr.c,v
retrieving revision 1.234
diff -u -p -r1.234 if_ethersubr.c
--- sys/net/if_ethersubr.c  1 Mar 2016 01:48:14 -   1.234
+++ sys/net/if_ethersubr.c  31 Mar 2016 06:38:07 -
@@ -392,10 +392,8 @@ decapsulate:
 #if NPPPOE > 0 || defined(PIPEX)
case ETHERTYPE_PPPOEDISC:
case ETHERTYPE_PPPOE:
-#ifndef PPPOE_SERVER
if (m->m_flags & (M_MCAST | M_BCAST))
goto dropanyway;
-#endif
M_PREPEND(m, sizeof(*eh), M_DONTWAIT);
if (m == NULL)
return (1);
Index: sys/net/if_pppoe.c
===
RCS file: /cvs/src/sys/net/if_pppoe.c,v
retrieving revision 1.52
diff -u -p -r1.52 if_pppoe.c
--- sys/net/if_pppoe.c  5 Dec 2015 10:07:55 -   1.52
+++ sys/net/if_pppoe.c  31 Mar 2016 06:35:54 -
@@ -115,10 +115,6 @@ struct pppoetag {
 #definePPPOE_DISC_MAXPADI  4   /* retry PADI four times 
(quickly) */
 #definePPPOE_DISC_MAXPADR  2   /* retry PADR twice */
 
-#ifdef PPPOE_SERVER
-#defineIFF_PASSIVE IFF_LINK0   /* wait passively for 
connection */
-#endif
-
 struct pppoe_softc {
struct sppp sc_sppp;/* contains a struct ifnet as first 
element */
LIST_ENTRY(pppoe_softc) sc_list;
@@ -134,10 +130,6 @@ struct pppoe_softc {
size_t sc_ac_cookie_len;/* length of cookie data */
u_int8_t *sc_relay_sid; /* content of relay SID we must echo 
back */
size_t sc_relay_sid_len;/* length of relay SID data */
-#ifdef PPPOE_SERVER
-   u_int8_t *sc_hunique;   /* content of host unique we must echo 
back */
-   size_t sc_hunique_len;  /* length of host unique */
-#endif
u_int32_t sc_unique;/* our unique id */
struct timeout sc_timeout;  /* timeout while not in session state */
int sc_padi_retried;/* number of PADI retries already done 
*/
@@ -171,10 +163,6 @@ static void pppoe_timeout(void *);
 /* sending actual protocol control packets */
 static int pppoe_send_padi(struct pppoe_softc *);
 static int pppoe_send_padr(struct pppoe_softc *);
-#ifdef PPPOE_SERVER
-static int pppoe_send_pado(struct pppoe_softc *);
-static int pppoe_send_pads(struct pppoe_softc *);
-#endif
 static int pppoe_send_padt(unsigned int, u_int, const u_int8_t *);
 
 /* raw output */
@@ -384,10 +372,6 @@ static void pppoe_dispatch_disc_pkt(stru
u_int8_t *ac_cookie;
u_int8_t *relay_sid;
u_int8_t code;
-#ifdef PPPOE_SERVER
-   u_int8_t *hunique;
-   size_t hunique_len;
-#endif
 
err_msg = NULL;
devname = "pppoe";
@@ -406,10 +390,6 @@ static void pppoe_dispatch_disc_pkt(stru
relay_sid = NULL;
relay_sid_len = 0;
max_payload = NULL;
-#ifdef PPPOE_SERVER
-   hunique = NULL;
-   hunique_len = 0;
-#endif
 
session = 0;
if (m->m_pkthdr.len - off <= PPPOE_HEADERLEN) {
@@ -478,10 +458,6 @@ static void pppoe_dispatch_disc_pkt(stru
err_msg = "TAG HUNIQUE ERROR";
break;
}
-#ifdef PPPOE_SERVER
-   hunique = mtod(n, caddr_t) +