Re: pf "route-to least-state" in an anchor doesn't work

2020-06-03 Thread YASUOKA Masahiko
Hello,

On Wed, 3 Jun 2020 23:30:56 +0200
Alexandr Nedvedicky  wrote:
> I'm OK with your change.

Thank you for your review and comment.

> However I would like to ask you to do yet another test.  I wonder if things
> will eventually work on unfixed PF if rules will be constructed as follows:
> 
> pfctl -a test -t LB -T add 10.0.0.11@pair102
> 
> echo 'pass in on rdomain 102 quick proto tcp to 10.0.0.101 port 8080 \
> keep state ( sloppy ) route-to  \
> least-states sticky-address' |pfctl -a test -f -
> 
> echo 'anchor test' | pfctl -f -
> 
> pfctl -e
> 
> I suspect the bug you've found and fixed happens when pfctl loads rules
> from pf.conf. I think the steps above will take a different route
> through the code, which avoids pfr_ina_define() (a.k.a. transactions).

I've tested it before the diff and after.  Both tests were OK.

  # pfctl -sr -a test   
 
  pass in quick on rdomain 102 inet proto tcp from any to 10.0.0.101 port = 
8080 flags S/SA keep state (sloppy) route-to  least-states sticky-address
  # pfctl -a test -tLB -Tshow
 10.0.0.11@pair102
  # 

  $ doas route -T 101 exec telnet 10.0.0.101 8080
  Trying 10.0.0.101...
  Connected to 10.0.0.101.
  Escape character is '^]'.
  ^]
  
  telnet> close
  Connection closed.
  $ 

> I don't have a test system readily available and I'm just curious
> if anything changes or not. Thanks for finding that for me.
> 
> As I've said I think your change should go in.
> 
> OK sashan

Thanks,



[patch] vmd(8) event mutexes and ns8250 ratelimiter change

2020-06-03 Thread Dave Voutila
tech@,

The following patch is a possible step towards improved thread safety
to vmd(8). (I'll be the first to say it's not perfect.)

The first major change: mutexes around most event addition/deletion
while using a defined event_base.

In vm.c and in the emulated devices, which is predominantly the
multithreaded code in vmd(8), it adds mutexes to try to help protect
the state of the TAILQ and timeheap inside the event_base from
corruption due to race conditions. This can make it easier to break
devices out into their own threads downline (of which I have a patch
for as well).

The other major change: It changes the way the ns8250 uart performs
rate limiting. The ratelimit callback is now used regardless of a
client's request for TXRDY interrupt reporting and the ratelimit
callback locks the device before manipulating registers. 

I found this ns8250 change resolves reported lockups and stuttering
caused due to what I believe are race conditions between the vcpu
thread calling vcpu_process_com_data (which does the writing to the fd)
and the event loop thread. This problem was often seen when spamming
the RETURN key via the serial console.

These changes can't fully protect against event/timer deletes being
performed by the vcpu exit handling threads. From my observations, this
was happening a lot in the i8523_reset function resulting in sudden
guest death in 9front and some Linux guests.


-Dave Voutila

[1]: https://marc.info/?l=openbsd-tech=159028442625596=2


Index: control.c
===
RCS file: /cvs/src/usr.sbin/vmd/control.c,v
retrieving revision 1.30
diff -u -p -u -p -r1.30 control.c
--- control.c   4 Dec 2018 08:15:09 -   1.30
+++ control.c   3 Jun 2020 19:25:12 -
@@ -39,6 +39,8 @@
 
 #defineCONTROL_BACKLOG 5
 
+extern struct event_base *global_evbase;
+
 struct ctl_connlist ctl_conns;
 
 void
@@ -198,8 +200,10 @@ control_listen(struct control_sock *cs)
 
event_set(>cs_ev, cs->cs_fd, EV_READ,
control_accept, cs);
+   event_base_set(global_evbase, >cs_ev);
event_add(>cs_ev, NULL);
evtimer_set(>cs_evt, control_accept, cs);
+   event_base_set(global_evbase, >cs_evt);
 
return (0);
 }
@@ -256,6 +260,7 @@ control_accept(int listenfd, short event
c->iev.data = cs;
event_set(>iev.ev, c->iev.ibuf.fd, c->iev.events,
c->iev.handler, c->iev.data);
+   event_base_set(global_evbase, >iev.ev);
event_add(>iev.ev, NULL);
 
TAILQ_INSERT_TAIL(_conns, c, entry);
Index: i8253.c
===
RCS file: /cvs/src/usr.sbin/vmd/i8253.c,v
retrieving revision 1.31
diff -u -p -u -p -r1.31 i8253.c
--- i8253.c 30 Nov 2019 00:51:29 -  1.31
+++ i8253.c 3 Jun 2020 19:25:12 -
@@ -30,6 +30,7 @@
 
 #include "i8253.h"
 #include "proc.h"
+#include "vmd.h"
 #include "vmm.h"
 #include "atomicio.h"
 
@@ -43,6 +44,9 @@ extern char *__progname;
  */
 struct i8253_channel i8253_channel[3];
 
+extern struct event_base *global_evbase;
+extern pthread_mutex_t global_evmutex;
+
 /*
  * i8253_init
  *
@@ -75,8 +79,13 @@ i8253_init(uint32_t vm_id)
i8253_channel[2].state = 0;
 
evtimer_set(_channel[0].timer, i8253_fire, _channel[0]);
+   event_base_set(global_evbase, _channel[0].timer);
+
evtimer_set(_channel[1].timer, i8253_fire, _channel[1]);
+   event_base_set(global_evbase, _channel[1].timer);
+
evtimer_set(_channel[2].timer, i8253_fire, _channel[2]);
+   event_base_set(global_evbase, _channel[2].timer);
 }
 
 /*
@@ -311,14 +320,18 @@ i8253_reset(uint8_t chn)
 {
struct timeval tv;
 
-   evtimer_del(_channel[chn].timer);
+   mutex_lock(_evmutex);
+   if (evtimer_pending(_channel[chn].timer, NULL))
+   evtimer_del(_channel[chn].timer);
timerclear();
 
i8253_channel[chn].in_use = 1;
i8253_channel[chn].state = 0;
tv.tv_usec = (i8253_channel[chn].start * NS_PER_TICK) / 1000;
clock_gettime(CLOCK_MONOTONIC, _channel[chn].ts);
+
evtimer_add(_channel[chn].timer, );
+   mutex_unlock(_evmutex);
 }
 
 /*
@@ -344,7 +357,9 @@ i8253_fire(int fd, short type, void *arg
if (ctr->mode != TIMER_INTTC) {
timerclear();
tv.tv_usec = (ctr->start * NS_PER_TICK) / 1000;
+   mutex_lock(_evmutex);
evtimer_add(>timer, );
+   mutex_unlock(_evmutex);
} else
ctr->state = 1;
 }
@@ -377,6 +392,8 @@ i8253_restore(int fd, uint32_t vm_id)
i8253_channel[i].vm_id = vm_id;
evtimer_set(_channel[i].timer, i8253_fire,
_channel[i]);
+   event_base_set(global_evbase, _channel[i].timer);
+
i8253_reset(i);
}
return (0);
@@ -386,8 +403,10 @@ void
 i8253_stop()
 {
int i;
+   mutex_lock(_evmutex);
for (i = 

Re: pf "route-to least-state" in an anchor doesn't work

2020-06-03 Thread Alexandr Nedvedicky
Hello Yasuoka,

I'm OK with your change.

However I would like to ask you to do yet another test.  I wonder if things
will eventually work on unfixed PF if rules will be constructed as follows:

pfctl -a test -t LB -T add 10.0.0.11@pair102

echo 'pass in on rdomain 102 quick proto tcp to 10.0.0.101 port 8080 \
keep state ( sloppy ) route-to  \
least-states sticky-address' |pfctl -a test -f -

echo 'anchor test' | pfctl -f -

pfctl -e

I suspect the bug you've found and fixed happens when pfctl loads rules
from pf.conf. I think the steps above will take a different route
through the code, which avoids pfr_ina_define() (a.k.a. transactions).

I don't have a test system readily available and I'm just curious
if anything changes or not. Thanks for finding that for me.

As I've said I think your change should go in.

OK sashan



Re: ospf6d: change the way interfaces are handled

2020-06-03 Thread Sebastian Benoit
Remi Locherer(remi.loche...@relo.ch) on 2020.06.03 15:36:17 +0200:
> On Sat, May 30, 2020 at 04:37:43PM +0200, Denis Fondras wrote:
> > This diff updates how ospf6d(8) handles interfaces.
> > It is now in line with what ospfd(8) does.
> > 
> > Last step before enabling reload.
> > 
> > Tested against Mikrotik and Zebra implementations.
> > 
> > Warning: it changes the default behaviour. No prefix is announced if no
> > "redistribute" statement is present in config file. Is this a showstopper ?
> 
> The diff reads good and works. I mostly agree with it.
> 
> But we should not change the behaviour. That prefixes configured on an
> interface need a redistribute statement is counter intuitive. The "passive"
> statement would be useless.

There is also a behaviour difference: the "interface { passive }" way only
announces the prefix on active/master interfaces.

I think it needs to stay.

Great work btw!

/Benno

> 
> > 
> > Index: hello.c
> > ===
> > RCS file: /cvs/src/usr.sbin/ospf6d/hello.c,v
> > retrieving revision 1.22
> > diff -u -p -r1.22 hello.c
> > --- hello.c 3 Jan 2020 17:25:48 -   1.22
> > +++ hello.c 30 May 2020 14:19:09 -
> > @@ -175,12 +175,16 @@ recv_hello(struct iface *iface, struct i
> > nbr->priority = LSA_24_GETHI(ntohl(hello.opts));
> > /* XXX neighbor address shouldn't be stored on virtual links */
> > nbr->addr = *src;
> > +   ospfe_imsg_compose_rde(IMSG_NEIGHBOR_ADDR, nbr->peerid, 0,
> > +   src, sizeof(struct in6_addr));
> > }
> >  
> > if (!IN6_ARE_ADDR_EQUAL(>addr, src)) {
> > log_warnx("%s: neighbor ID %s changed its address to %s",
> > __func__, inet_ntoa(nbr->id), log_in6addr(src));
> > nbr->addr = *src;
> > +   ospfe_imsg_compose_rde(IMSG_NEIGHBOR_ADDR, nbr->peerid, 0,
> > +   src, sizeof(struct in6_addr));
> > }
> >  
> > nbr->options = opts;
> > Index: interface.c
> > ===
> > RCS file: /cvs/src/usr.sbin/ospf6d/interface.c,v
> > retrieving revision 1.29
> > diff -u -p -r1.29 interface.c
> > --- interface.c 27 May 2020 09:03:56 -  1.29
> > +++ interface.c 30 May 2020 14:19:09 -
> > @@ -72,8 +72,6 @@ struct {
> >  static int vlink_cnt = 0;
> >  #endif
> >  
> > -TAILQ_HEAD(, iface)iflist;
> > -
> >  const char * const if_event_names[] = {
> > "NOTHING",
> > "UP",
> > @@ -145,10 +143,6 @@ if_fsm(struct iface *iface, enum iface_e
> > area_track(iface->area);
> > orig_rtr_lsa(iface->area);
> > orig_link_lsa(iface);
> > -
> > -   /* state change inform RDE */
> > -   ospfe_imsg_compose_rde(IMSG_IFINFO, iface->self->peerid, 0,
> > -   >state, sizeof(iface->state));
> > }
> >  
> > if (old_state & (IF_STA_MULTI | IF_STA_POINTTOPOINT) &&
> > @@ -166,41 +160,8 @@ if_fsm(struct iface *iface, enum iface_e
> > return (ret);
> >  }
> >  
> > -int
> > -if_init(void)
> > -{
> > -   TAILQ_INIT();
> > -
> > -   return (fetchifs(0));
> > -}
> > -
> > -/* XXX using a linked list should be OK for now */
> >  struct iface *
> > -if_find(unsigned int ifindex)
> > -{
> > -   struct iface*iface;
> > -
> > -   TAILQ_FOREACH(iface, , list) {
> > -   if (ifindex == iface->ifindex)
> > -   return (iface);
> > -   }
> > -   return (NULL);
> > -}
> > -
> > -struct iface *
> > -if_findname(char *name)
> > -{
> > -   struct iface*iface;
> > -
> > -   TAILQ_FOREACH(iface, , list) {
> > -   if (!strcmp(name, iface->name))
> > -   return (iface);
> > -   }
> > -   return (NULL);
> > -}
> > -
> > -struct iface *
> > -if_new(u_short ifindex, char *ifname)
> > +if_new(struct kif *kif, struct kif_addr *ka)
> >  {
> > struct iface*iface;
> >  
> > @@ -210,7 +171,6 @@ if_new(u_short ifindex, char *ifname)
> > iface->state = IF_STA_DOWN;
> >  
> > LIST_INIT(>nbr_list);
> > -   TAILQ_INIT(>ifa_list);
> > TAILQ_INIT(>ls_ack_list);
> > RB_INIT(>lsa_tree);
> >  
> > @@ -225,34 +185,36 @@ if_new(u_short ifindex, char *ifname)
> > return (iface);
> > }
> >  #endif
> > -   strlcpy(iface->name, ifname, sizeof(iface->name));
> > -   iface->ifindex = ifindex;
> > -
> > -   TAILQ_INSERT_TAIL(, iface, list);
> > -
> > -   return (iface);
> > -}
> >  
> > -void
> > -if_update(struct iface *iface, int mtu, int flags, u_int8_t type,
> > -u_int8_t state, u_int64_t rate, u_int32_t rdomain)
> > -{
> > -   iface->mtu = mtu;
> > -   iface->flags = flags;
> > -   iface->if_type = type;
> > -   iface->linkstate = state;
> > -   iface->baudrate = rate;
> > -   iface->rdomain = rdomain;
> > +   strlcpy(iface->name, kif->ifname, sizeof(iface->name));
> >  
> > -   /* set type */
> > -   if (flags & IFF_POINTOPOINT)
> > +   /* get type */
> > +   if (kif->flags & 

Re: top: Fill last character in process line

2020-06-03 Thread Klemens Nanni
On Wed, Jun 03, 2020 at 05:33:24PM +0100, Nicholas Marriott wrote:
> Actually I've got them the wrong way round here, but others have already
> explained them anyway :-).
Yup, which is why I will simply drop the diff:  way too much hassle for
single column of output, let alone potential breakages on some terminals.

Thanks everyone for chiming in.



dhclient new commit are good but....

2020-06-03 Thread sven falempin
Dear readers,

  since 5.8 i ve been carrying around patches to manage :
* crazy server sending hostname like "crazy ISP name with space" ( in
one case the ignore or supersede failed to workaround the problem ),
it  is a bit hard to test, and it looks like some improvement was made
to crash fatal on all invalid network information,
* and a bridging case, which is more realistic :

Assuming the EGRESS is Bridged to a vether, a pair or something to
serve the same LAN to a VM or something else, the first dhclient will eat
the paquet in the BPF filter because MAC are ignored.

MAC are ignored for some obscure historic case where the dhcp server responded
with broadcast or something.

To see the problem , assuming em0 is egress like :

# ifconfig  em0
em0: 
flags=808b43
mtu 1500
lladdr 00:ff:18:0b:4a:ee
index 1 priority 0 llprio 3
groups: egress
media: Ethernet autoselect (100baseTX full-duplex,rxpause,txpause)
status: active
inet 172.16.1.4 netmask 0xff00 broadcast 172.16.1.255

Bridge it to something

ifconfig  vether0 create
ifconfig  vether0 rdomain 1
ifconfig  bridge0 create
ifconfig  bridge0 add em0
ifconfig  bridge0 add vether0
#safety net
echo <<< EOF >> /etc/dhclient
interface "vether1" {
send host-name "bridged-sub-host-1";
send dhcp-lease-time 3600;
ignore domain-name-servers,routers;
require subnet-mask,domain-name-servers;
}
#testing
ifconfig  bridge0 up
route -T 1 exec dhclient vether0

No lease ! because dhclient on em0 is actually filtering them at bpf level
A year ago the hw_addr was kept inside the daemon so i am not sure how
to apply my bpf filter.

To not break something the path add a -m option to dhclient that
enable mac bpf filter awareness,

In configure_bpf_sock, I added a  ` uint8_t *  ether_addr_octet` param
that is not null
and setup in case -m is passed to fill in a slightly different filter.

/* Set up the bpf filter program structure. */
p.bf_len = dhcp_bpf_mac_filter_len;
p.bf_insns = dhcp_bpf_mac_filter;
dhcp_bpf_mac_filter[8].k = LOCAL_PORT;
memcpy(, ether_addr_octet, sizeof(bits16));
dhcp_bpf_mac_filter[10].k = ntohs(bits16);
memcpy(, ether_addr_octet + 2, sizeof(bits));
dhcp_bpf_mac_filter[12].k = ntohl(bits);

with the filter :

struct bpf_insn dhcp_bpf_mac_filter[] = {
/* Make sure this is an IP packet. */
BPF_STMT(BPF_LD + BPF_H + BPF_ABS, 12),
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ETHERTYPE_IP, 0, 8),

/* Make sure it's a UDP packet. */
BPF_STMT(BPF_LD + BPF_B + BPF_ABS, 23),
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_UDP, 0, 6),

/* Make sure this isn't a fragment. */
BPF_STMT(BPF_LD + BPF_H + BPF_ABS, 20),
BPF_JUMP(BPF_JMP + BPF_JSET + BPF_K, 0x1fff, 8, 0),

/* Get the IP header length. */
BPF_STMT(BPF_LDX + BPF_B + BPF_MSH, 14),

/* Make sure it's to the right port. */
BPF_STMT(BPF_LD + BPF_H + BPF_IND, 16),
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 67, 0, 1),  /* patch */

/* check bootp.hw.addr 2 bytes */
BPF_STMT(BPF_LD + BPF_H + BPF_IND, 50),
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0x, 0, 3),  /* patch */
/* check bootp.hw.addr 4 bytes */
BPF_STMT(BPF_LD + BPF_W + BPF_IND, 52),
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0x, 0, 1),  /* patch */

/* If we passed all the tests, ask for the whole packet. */
BPF_STMT(BPF_RET+BPF_K, (unsigned int)-1),

/* Otherwise, drop it. */
BPF_STMT(BPF_RET+BPF_K, 0),
};

int dhcp_bpf_filter_len = sizeof(dhcp_bpf_filter) / sizeof(struct bpf_insn);
int dhcp_bpf_mac_filter_len = sizeof(dhcp_bpf_mac_filter) /
sizeof(struct bpf_insn);

I would gladly test an officially made diff because this is becoming
hard to maintain,
and there should be use cases out there.

The only workaround I know is to put egress in a vether behind the bridges,
certainly something that could break anyway.

Thanks for reading up to there !

-- 
--
-
Knowing is not enough; we must apply. Willing is not enough; we must do



Re: pppx(4): rework to be clese to pseudo-interfaces

2020-06-03 Thread Vitaliy Makkoveev
ping?



Re: top: Fill last character in process line

2020-06-03 Thread Nicholas Marriott
Actually I've got them the wrong way round here, but others have already
explained them anyway :-).



On Wed, Jun 03, 2020 at 05:04:43PM +0100, Nicholas Marriott wrote:
> xenl (xm) only matters for the last line - you can't write into the very
> bottom right position without causing the terminal to scroll. No xenl is
> a pain and most applications solve it by just leaving that position
> always empty. The only terminal still about without xenl that I am aware
> of is the NetBSD console.
> 
> am only matters if you care where the cursor is after you write to the
> last character - that is, if you want to rely on it moving to the next
> line automatically. You can still write at the rightmost cursor
> position, just don't rely on the cursor moving to the next line or the
> terminal scrolling when you do. I expect top doesn't need to worry about
> this.
> 
> You could probably just use the full width except for the last line and
> trim the last line by 1 if tgetflag("xm") is true.
> 
> Or doesn't ncurses take care of all this anyway? Maybe it doesn't...
> 
> 
> On Wed, Jun 03, 2020 at 02:49:48PM +0200, Klemens Nanni wrote:
> > On Wed, Jun 03, 2020 at 12:45:35PM +0100, Stuart Henderson wrote:
> > > It should check terminal capabilities for this, see termcap(5).
> > > If 'am' (auto-margin) is set then it shouldn't write to the final column.
> > > If 'xn' is set then it's OK in some circumstances (it's probably easier to
> > > skip writing to the final column if this is set too).
> > Thanks mark and Stuart, I did not know about auto-margin (or autoWrap as
> > xterm(1) seems to call it).
> > 
> > What I understand is that writing to the screen's last terminal should
> > be avoided in terminal without the "am" capability, presumably because
> > it would cause a line wrap - is that correct?
> > 
> > Preliminary testing however indicates to me that at least xterm(1)
> > behaves the same in top's interactive screen with my patch, regardless
> > of the auto-margin capablility.
> > 
> > According to termcap(5) I did the following to disable "am", with
> > tput(1) I verify that it gets indeed disabled:
> > 
> > $  echo $TERM ; tput am ; echo $?
> > xterm
> > 0
> > $ TERM=vt100-nam ; tput am ; echo $?
> > 1
> > 
> > But in both cases, starting ./obj/top in the very same terminal/shell
> > behaves the same, that is to say the last column is written properly and
> > I see no line wrap or any change of behaviour.
> > 



Re: top: Fill last character in process line

2020-06-03 Thread Nicholas Marriott
xenl (xm) only matters for the last line - you can't write into the very
bottom right position without causing the terminal to scroll. No xenl is
a pain and most applications solve it by just leaving that position
always empty. The only terminal still about without xenl that I am aware
of is the NetBSD console.

am only matters if you care where the cursor is after you write to the
last character - that is, if you want to rely on it moving to the next
line automatically. You can still write at the rightmost cursor
position, just don't rely on the cursor moving to the next line or the
terminal scrolling when you do. I expect top doesn't need to worry about
this.

You could probably just use the full width except for the last line and
trim the last line by 1 if tgetflag("xm") is true.

Or doesn't ncurses take care of all this anyway? Maybe it doesn't...


On Wed, Jun 03, 2020 at 02:49:48PM +0200, Klemens Nanni wrote:
> On Wed, Jun 03, 2020 at 12:45:35PM +0100, Stuart Henderson wrote:
> > It should check terminal capabilities for this, see termcap(5).
> > If 'am' (auto-margin) is set then it shouldn't write to the final column.
> > If 'xn' is set then it's OK in some circumstances (it's probably easier to
> > skip writing to the final column if this is set too).
> Thanks mark and Stuart, I did not know about auto-margin (or autoWrap as
> xterm(1) seems to call it).
> 
> What I understand is that writing to the screen's last terminal should
> be avoided in terminal without the "am" capability, presumably because
> it would cause a line wrap - is that correct?
> 
> Preliminary testing however indicates to me that at least xterm(1)
> behaves the same in top's interactive screen with my patch, regardless
> of the auto-margin capablility.
> 
> According to termcap(5) I did the following to disable "am", with
> tput(1) I verify that it gets indeed disabled:
> 
>   $  echo $TERM ; tput am ; echo $?
>   xterm
>   0
>   $ TERM=vt100-nam ; tput am ; echo $?
>   1
> 
> But in both cases, starting ./obj/top in the very same terminal/shell
> behaves the same, that is to say the last column is written properly and
> I see no line wrap or any change of behaviour.
> 



Re: top: Fill last character in process line

2020-06-03 Thread Theo de Raadt
Stuart Henderson  wrote:

> On 2020/06/03 14:49, Klemens Nanni wrote:
> > On Wed, Jun 03, 2020 at 12:45:35PM +0100, Stuart Henderson wrote:
> > > It should check terminal capabilities for this, see termcap(5).
> > > If 'am' (auto-margin) is set then it shouldn't write to the final column.
> > > If 'xn' is set then it's OK in some circumstances (it's probably easier to
> > > skip writing to the final column if this is set too).
> > Thanks mark and Stuart, I did not know about auto-margin (or autoWrap as
> > xterm(1) seems to call it).
> > 
> > What I understand is that writing to the screen's last terminal should
> > be avoided in terminal without the "am" capability, presumably because
> > it would cause a line wrap - is that correct?
> > 
> > Preliminary testing however indicates to me that at least xterm(1)
> > behaves the same in top's interactive screen with my patch, regardless
> > of the auto-margin capablility.
> > 
> > According to termcap(5) I did the following to disable "am", with
> > tput(1) I verify that it gets indeed disabled:
> > 
> > $  echo $TERM ; tput am ; echo $?
> > xterm
> > 0
> > $ TERM=vt100-nam ; tput am ; echo $?
> > 1
> > 
> > But in both cases, starting ./obj/top in the very same terminal/shell
> > behaves the same, that is to say the last column is written properly and
> > I see no line wrap or any change of behaviour.
> > 
> 
> termcap/TERM isn't an instruction to the terminal to behave in a
> certain way, it describes characteristics of the actual terminal (or
> terminal emulator). Most of the common X-based emulators don't have the
> same characteristic. Even when they have something that looks similar
> e.g. xterm in "autowraparound" mode, it isn't quite the same thing as
> automargin as it only wraps when you've printed a char in the right-hand
> column and then print an additional char.
> 
> It's quite possible you will need to find certain types of older real
> hardware terminal to see the behaviour that requires 'am' handling.
> 
> (You *can* still print to the RH column of most lines with an auto-margin
> terminal, as long as you don't send an extra linefeed. However you can't
> write to the RH column of the bottom line of the screen without shifting
> chars into place. The easy way out is to just not print to RH column on
> am terminals).

That is a good description of the problem.

It seems like rather than having specialized case code for the
condition, top generalizes the problem by entirely avoiding printing in
those positions.  And you want to delete it, which will break top on
all those (rare) displays.



Re: userland clock_gettime proof of concept

2020-06-03 Thread Paul Irofti

On 2020-05-31 20:46, Mark Kettenis wrote:

Forget about all that for a moment.  Here is an alternative suggestion:

On sparc64 we need to support both tick_timecounter and
sys_tick_timecounter.  So we need some sort of clockid value to
distnguish between those two.  I already suggested to use the tc_user
field of the timecounter for that.  0 means that a timecounter is not
usable in userland, a (small) positive integer means a specific
timecounter type.  The code in libc will need to know whether a
particular timecounter type can be supported.  My proposal would be to
implement a function*on all architecture*  that takes the clockid as
an argument and returns a pointer to the function that implements
support for that timecounter.  On architectures without support, ir
when called with a clockid that isn't supported, that function would
simply return NULL.


I am sorry, but the more I try to implement this in a sane way, the more 
obvious it is that it is not possible. I would rather have a define 
sausage than something like this.


I will try to think of something else that avoids the defines, but I do 
not think that your proposal is a valid solution.




Re: ospf6d: change the way interfaces are handled

2020-06-03 Thread Remi Locherer
On Sat, May 30, 2020 at 04:37:43PM +0200, Denis Fondras wrote:
> This diff updates how ospf6d(8) handles interfaces.
> It is now in line with what ospfd(8) does.
> 
> Last step before enabling reload.
> 
> Tested against Mikrotik and Zebra implementations.
> 
> Warning: it changes the default behaviour. No prefix is announced if no
> "redistribute" statement is present in config file. Is this a showstopper ?

The diff reads good and works. I mostly agree with it.

But we should not change the behaviour. That prefixes configured on an
interface need a redistribute statement is counter intuitive. The "passive"
statement would be useless.

> 
> Index: hello.c
> ===
> RCS file: /cvs/src/usr.sbin/ospf6d/hello.c,v
> retrieving revision 1.22
> diff -u -p -r1.22 hello.c
> --- hello.c   3 Jan 2020 17:25:48 -   1.22
> +++ hello.c   30 May 2020 14:19:09 -
> @@ -175,12 +175,16 @@ recv_hello(struct iface *iface, struct i
>   nbr->priority = LSA_24_GETHI(ntohl(hello.opts));
>   /* XXX neighbor address shouldn't be stored on virtual links */
>   nbr->addr = *src;
> + ospfe_imsg_compose_rde(IMSG_NEIGHBOR_ADDR, nbr->peerid, 0,
> + src, sizeof(struct in6_addr));
>   }
>  
>   if (!IN6_ARE_ADDR_EQUAL(>addr, src)) {
>   log_warnx("%s: neighbor ID %s changed its address to %s",
>   __func__, inet_ntoa(nbr->id), log_in6addr(src));
>   nbr->addr = *src;
> + ospfe_imsg_compose_rde(IMSG_NEIGHBOR_ADDR, nbr->peerid, 0,
> + src, sizeof(struct in6_addr));
>   }
>  
>   nbr->options = opts;
> Index: interface.c
> ===
> RCS file: /cvs/src/usr.sbin/ospf6d/interface.c,v
> retrieving revision 1.29
> diff -u -p -r1.29 interface.c
> --- interface.c   27 May 2020 09:03:56 -  1.29
> +++ interface.c   30 May 2020 14:19:09 -
> @@ -72,8 +72,6 @@ struct {
>  static int vlink_cnt = 0;
>  #endif
>  
> -TAILQ_HEAD(, iface)  iflist;
> -
>  const char * const if_event_names[] = {
>   "NOTHING",
>   "UP",
> @@ -145,10 +143,6 @@ if_fsm(struct iface *iface, enum iface_e
>   area_track(iface->area);
>   orig_rtr_lsa(iface->area);
>   orig_link_lsa(iface);
> -
> - /* state change inform RDE */
> - ospfe_imsg_compose_rde(IMSG_IFINFO, iface->self->peerid, 0,
> - >state, sizeof(iface->state));
>   }
>  
>   if (old_state & (IF_STA_MULTI | IF_STA_POINTTOPOINT) &&
> @@ -166,41 +160,8 @@ if_fsm(struct iface *iface, enum iface_e
>   return (ret);
>  }
>  
> -int
> -if_init(void)
> -{
> - TAILQ_INIT();
> -
> - return (fetchifs(0));
> -}
> -
> -/* XXX using a linked list should be OK for now */
>  struct iface *
> -if_find(unsigned int ifindex)
> -{
> - struct iface*iface;
> -
> - TAILQ_FOREACH(iface, , list) {
> - if (ifindex == iface->ifindex)
> - return (iface);
> - }
> - return (NULL);
> -}
> -
> -struct iface *
> -if_findname(char *name)
> -{
> - struct iface*iface;
> -
> - TAILQ_FOREACH(iface, , list) {
> - if (!strcmp(name, iface->name))
> - return (iface);
> - }
> - return (NULL);
> -}
> -
> -struct iface *
> -if_new(u_short ifindex, char *ifname)
> +if_new(struct kif *kif, struct kif_addr *ka)
>  {
>   struct iface*iface;
>  
> @@ -210,7 +171,6 @@ if_new(u_short ifindex, char *ifname)
>   iface->state = IF_STA_DOWN;
>  
>   LIST_INIT(>nbr_list);
> - TAILQ_INIT(>ifa_list);
>   TAILQ_INIT(>ls_ack_list);
>   RB_INIT(>lsa_tree);
>  
> @@ -225,34 +185,36 @@ if_new(u_short ifindex, char *ifname)
>   return (iface);
>   }
>  #endif
> - strlcpy(iface->name, ifname, sizeof(iface->name));
> - iface->ifindex = ifindex;
> -
> - TAILQ_INSERT_TAIL(, iface, list);
> -
> - return (iface);
> -}
>  
> -void
> -if_update(struct iface *iface, int mtu, int flags, u_int8_t type,
> -u_int8_t state, u_int64_t rate, u_int32_t rdomain)
> -{
> - iface->mtu = mtu;
> - iface->flags = flags;
> - iface->if_type = type;
> - iface->linkstate = state;
> - iface->baudrate = rate;
> - iface->rdomain = rdomain;
> + strlcpy(iface->name, kif->ifname, sizeof(iface->name));
>  
> - /* set type */
> - if (flags & IFF_POINTOPOINT)
> + /* get type */
> + if (kif->flags & IFF_POINTOPOINT)
>   iface->type = IF_TYPE_POINTOPOINT;
> - if (flags & IFF_BROADCAST && flags & IFF_MULTICAST)
> + if (kif->flags & IFF_BROADCAST && kif->flags & IFF_MULTICAST)
>   iface->type = IF_TYPE_BROADCAST;
> - if (flags & IFF_LOOPBACK) {
> + if (kif->flags & IFF_LOOPBACK) {
>   iface->type = IF_TYPE_POINTOPOINT;
> - iface->cflags |= 

Re: top: Fill last character in process line

2020-06-03 Thread Steffen Nurpmeso
Klemens Nanni wrote in
<20200603124948.irvdnxrxa5g75pmi@eru>:
 |On Wed, Jun 03, 2020 at 12:45:35PM +0100, Stuart Henderson wrote:
 |> It should check terminal capabilities for this, see termcap(5).
 |> If 'am' (auto-margin) is set then it shouldn't write to the final column.
 |> If 'xn' is set then it's OK in some circumstances (it's probably \
 |> easier to
 |> skip writing to the final column if this is set too).
 |Thanks mark and Stuart, I did not know about auto-margin (or autoWrap as
 |xterm(1) seems to call it).
 |
 |What I understand is that writing to the screen's last terminal should
 |be avoided in terminal without the "am" capability, presumably because
 |it would cause a line wrap - is that correct?
 |
 |Preliminary testing however indicates to me that at least xterm(1)
 |behaves the same in top's interactive screen with my patch, regardless
 |of the auto-margin capablility.
 |
 |According to termcap(5) I did the following to disable "am", with
 |tput(1) I verify that it gets indeed disabled:
 |
 | $  echo $TERM ; tput am ; echo $?
 | xterm
 | 0
 | $ TERM=vt100-nam ; tput am ; echo $?
 | 1
 |
 |But in both cases, starting ./obj/top in the very same terminal/shell
 |behaves the same, that is to say the last column is written properly and
 |I see no line wrap or any change of behaviour.

I do not think that you en- or disable "am", it indicates whether
the terminal has the capability or not.  I have

  n_TERMCAP_QUERY_am, /* am/am, BOOL | auto_right_margin */
  n_TERMCAP_QUERY_sam,/* sam/YE, BOOL | semi_auto_right_margin */
  n_TERMCAP_QUERY_xenl,   /* xenl/xn, BOOL | eat_newline_glitch */

...^^terminfo/termcap^^ names | terminfo variable
eat_newline_glitch ensures that \n written after margin does not
cause a line wrap.

  /* TODO We do not handle !n_TERMCAP_QUERY_sam in this software! */
  if(!n_termcap_query(n_TERMCAP_QUERY_am, ) ||
n_termcap_query(n_TERMCAP_QUERY_xenl, ))
 n_psonce |= n_PSO_TERMCAP_FULLWIDTH;

...

  /* Then search for right boundary.  Dependent upon n_PSO_FULLWIDTH (termcap
   * am/xn) We leave the rightmost column empty because some terminals
   * [cw]ould wrap the line if we write into that, or not.
   * TODO We do not deal with !n_TERMCAP_QUERY_sam */

 --End of <20200603124948.irvdnxrxa5g75pmi@eru>

--steffen
|
|Der Kragenbaer,The moon bear,
|der holt sich munter   he cheerfully and one by one
|einen nach dem anderen runter  wa.ks himself off
|(By Robert Gernhardt)



Re: top: Fill last character in process line

2020-06-03 Thread Stuart Henderson
On 2020/06/03 14:49, Klemens Nanni wrote:
> On Wed, Jun 03, 2020 at 12:45:35PM +0100, Stuart Henderson wrote:
> > It should check terminal capabilities for this, see termcap(5).
> > If 'am' (auto-margin) is set then it shouldn't write to the final column.
> > If 'xn' is set then it's OK in some circumstances (it's probably easier to
> > skip writing to the final column if this is set too).
> Thanks mark and Stuart, I did not know about auto-margin (or autoWrap as
> xterm(1) seems to call it).
> 
> What I understand is that writing to the screen's last terminal should
> be avoided in terminal without the "am" capability, presumably because
> it would cause a line wrap - is that correct?
> 
> Preliminary testing however indicates to me that at least xterm(1)
> behaves the same in top's interactive screen with my patch, regardless
> of the auto-margin capablility.
> 
> According to termcap(5) I did the following to disable "am", with
> tput(1) I verify that it gets indeed disabled:
> 
>   $  echo $TERM ; tput am ; echo $?
>   xterm
>   0
>   $ TERM=vt100-nam ; tput am ; echo $?
>   1
> 
> But in both cases, starting ./obj/top in the very same terminal/shell
> behaves the same, that is to say the last column is written properly and
> I see no line wrap or any change of behaviour.
> 

termcap/TERM isn't an instruction to the terminal to behave in a
certain way, it describes characteristics of the actual terminal (or
terminal emulator). Most of the common X-based emulators don't have the
same characteristic. Even when they have something that looks similar
e.g. xterm in "autowraparound" mode, it isn't quite the same thing as
automargin as it only wraps when you've printed a char in the right-hand
column and then print an additional char.

It's quite possible you will need to find certain types of older real
hardware terminal to see the behaviour that requires 'am' handling.

(You *can* still print to the RH column of most lines with an auto-margin
terminal, as long as you don't send an extra linefeed. However you can't
write to the RH column of the bottom line of the screen without shifting
chars into place. The easy way out is to just not print to RH column on
am terminals).



Re: top: Fill last character in process line

2020-06-03 Thread Klemens Nanni
On Wed, Jun 03, 2020 at 12:45:35PM +0100, Stuart Henderson wrote:
> It should check terminal capabilities for this, see termcap(5).
> If 'am' (auto-margin) is set then it shouldn't write to the final column.
> If 'xn' is set then it's OK in some circumstances (it's probably easier to
> skip writing to the final column if this is set too).
Thanks mark and Stuart, I did not know about auto-margin (or autoWrap as
xterm(1) seems to call it).

What I understand is that writing to the screen's last terminal should
be avoided in terminal without the "am" capability, presumably because
it would cause a line wrap - is that correct?

Preliminary testing however indicates to me that at least xterm(1)
behaves the same in top's interactive screen with my patch, regardless
of the auto-margin capablility.

According to termcap(5) I did the following to disable "am", with
tput(1) I verify that it gets indeed disabled:

$  echo $TERM ; tput am ; echo $?
xterm
0
$ TERM=vt100-nam ; tput am ; echo $?
1

But in both cases, starting ./obj/top in the very same terminal/shell
behaves the same, that is to say the last column is written properly and
I see no line wrap or any change of behaviour.



Re: Error reporting in ikev2_ike_sa_alive (was: Improve error reporting in pfkey_sa_last_used)

2020-06-03 Thread matthew j weaver
On Sun, May 31, 2020, at 17:27, Tobias Heider wrote:
> I don't think this is a good idea
> With your diff the log gets spammed with 'Undefined error: 0' for child SAs
> that have never been used.
> Also log_warn seems a bit too much as those errors are rarely serious.

  Thank you for having a look, Tobias. I appreciate the time & the feedback.

  Embarassed I didn't test the unused child SA case.

  I'll come back around if I think of a more elegant way to expose debug for 
the case here that's interesting.

  matthew weaver



PCI interrupt routing on UEFI machine?

2020-06-03 Thread Jaromír Doleček
Hello,

I'm working on porting xmm7360 device driver from Linux to OpenBSD,
the Linux code:
https://github.com/xmm7360/xmm7360-pci/

Most of device driver communication with the device is already
working, but currently I can't get the interrupt routine to actually
trigger after successful pci_intr_establish(). Tried to use INTx and
MSI interrupt, that doesn't make difference.

It may or may not be due to some missing platform initialization,
since the machine is booted in UEFI mode. Unfortunately the machine is
not able to boot USB disks in legacy mode, so I can't test how it
behaves in that case.

I've already checked how the device is initialised, and there is no
difference between what Linux and OpenBSD PCI code does, as far as I
can tell. Another data point is that some other PCI devices work on
the machine (NVMe disk and em(4) network controller).

I however see INTx IRQ allocated on Linux is 155 while it's 255 (i.e.
not assigned) on OpenBSD. I also see that MSI Address+Data is
different (Linux uses a unique MSI Address, while OpenBSD uses same
Address but different Data).

Can you give some advice on what to check and confirm whether it's
indeed hw setup problem? Are there any known problems with PCI
interrupt routing on UEFI machines under OpenBSD, or possibly some
floating patches to try?

Dmesg:
https://drive.google.com/file/d/1CPDvLdR94Oj2oaKDtIEAAGFEBqubMv3W/view?usp=sharing

OpenBSD lspci -vvx
https://drive.google.com/file/d/1HpsBaeXVAfewN5kjaLiXnTotef_UNHAn/view?usp=sharing

Linux lspci -vvx
https://drive.google.com/file/d/1ybcnyw13o-Tz2_vKzOsfJCN-sPeyYEQ7/view?usp=sharing

Jaromir



Re: top: Fill last character in process line

2020-06-03 Thread Stuart Henderson
On 2020/06/03 12:46, Klemens Nanni wrote:
> 
> i_process() prints process lines from the global buffer thisline[MAX_COLS]
> which is filed by format_next_process() using snprintf(3), i.e. it is
> guaranteed to be NUL terminated.
> 
> display_width is always set to screen_width and capped to MAX_COLS-1
> in display_resize(), so NUL terminating thisline[] at index
> display_width is not only redundant but also cuts off the last visibile
> character for each process line.
> 
> Remove this redundancy to make top use the entire line and not have
> process names (or arguments) end one char too early in interactive use.
> 
> Feedback? OK?

It should check terminal capabilities for this, see termcap(5).
If 'am' (auto-margin) is set then it shouldn't write to the final column.
If 'xn' is set then it's OK in some circumstances (it's probably easier to
skip writing to the final column if this is set too).



Re: top: Fill last character in process line

2020-06-03 Thread Klemens Nanni
On Wed, Jun 03, 2020 at 01:11:15PM +0200, Mark Kettenis wrote:
> Does that write into the last column of a 80-character wide screen?
> That causes a linewrap on some terminals isn't it?  And that would be
> undesirable.
Yes, it does;  xterm and st from the x11/st package have no problem
however to display this, 'tmux new top' also works fine in both.



Re: drop addtrust from cert.pem?

2020-06-03 Thread Stuart Henderson
On 2020/06/02 21:38, Bob Beck wrote:
> On Mon, Jun 01, 2020 at 06:04:17PM +0100, Stuart Henderson wrote:
> > OK to drop the expired AddTrust cert from cert.pem?
> 
> yes, thanks.
> 
> > 
> > I checked against the firefox set, there are no new/removed certs that
> > work with libressl there. There are now two with GENERALIZEDTIME notAfter
> > dates from before 2050 that don't work though (I only remember seeing one
> > of those when I last looked).. but that is a separate issue.
> > 
> > /C=EE/O=AS Sertifitseerimiskeskus/CN=EE Certification Centre Root 
> > CA/emailAddress=p...@sk.ee
> > /C=PL/O=Unizeto Technologies S.A./OU=Certum Certification 
> > Authority/CN=Certum Trusted Network CA 2
> 
> I suspect these can safely be dropped too.

I haven't included them anyway because they don't work with libressl.
btw Mozilla knew about this at least when they added the Certum one,

https://bugzilla.mozilla.org/show_bug.cgi?id=999378#c30

"mozilla::pkix does not enforce this rule about when Generalized Time
may be used. If we decide to add code to enforce this rule, it will be
for certificates created after a certain date (definitely later than
2013)"

Not sure what the Certum one is used for, the p...@sk.ee one is kinda
important, it's used for https://en.wikipedia.org/wiki/Estonian_identity_card



Re: top: Fill last character in process line

2020-06-03 Thread Mark Kettenis
> Date: Wed, 3 Jun 2020 12:46:48 +0200
> From: Klemens Nanni 
> 
> i_process() prints process lines from the global buffer thisline[MAX_COLS]
> which is filed by format_next_process() using snprintf(3), i.e. it is
> guaranteed to be NUL terminated.
> 
> display_width is always set to screen_width and capped to MAX_COLS-1
> in display_resize(), so NUL terminating thisline[] at index
> display_width is not only redundant but also cuts off the last visibile
> character for each process line.
> 
> Remove this redundancy to make top use the entire line and not have
> process names (or arguments) end one char too early in interactive use.
> 
> Feedback? OK?

Does that write into the last column of a 80-character wide screen?
That causes a linewrap on some terminals isn't it?  And that would be
undesirable.

> diff --git a/usr.bin/top/display.c b/usr.bin/top/display.c
> index ff02198638e..3be47e6c5dd 100644
> --- a/usr.bin/top/display.c
> +++ b/usr.bin/top/display.c
> @@ -540,9 +540,6 @@ i_process(int line, char *thisline, int hl)
>   /* make sure we are on the correct line */
>   move(y_procs + line, 0);
>  
> - /* truncate the line to conform to our current screen width */
> - thisline[display_width] = '\0';
> -
>   /* write the line out */
>   if (hl && smart_terminal)
>   standoutp();
> 
> 



top: Fill last character in process line

2020-06-03 Thread Klemens Nanni

i_process() prints process lines from the global buffer thisline[MAX_COLS]
which is filed by format_next_process() using snprintf(3), i.e. it is
guaranteed to be NUL terminated.

display_width is always set to screen_width and capped to MAX_COLS-1
in display_resize(), so NUL terminating thisline[] at index
display_width is not only redundant but also cuts off the last visibile
character for each process line.

Remove this redundancy to make top use the entire line and not have
process names (or arguments) end one char too early in interactive use.

Feedback? OK?

diff --git a/usr.bin/top/display.c b/usr.bin/top/display.c
index ff02198638e..3be47e6c5dd 100644
--- a/usr.bin/top/display.c
+++ b/usr.bin/top/display.c
@@ -540,9 +540,6 @@ i_process(int line, char *thisline, int hl)
/* make sure we are on the correct line */
move(y_procs + line, 0);
 
-   /* truncate the line to conform to our current screen width */
-   thisline[display_width] = '\0';
-
/* write the line out */
if (hl && smart_terminal)
standoutp();



Re: NFS kqueue handlers & poll(2)/select(2) compatibility

2020-06-03 Thread Martin Pieuchot
On 01/06/20(Mon) 15:41, Visa Hankala wrote:
> On Sun, May 31, 2020 at 10:48:52AM +0200, Martin Pieuchot wrote:
> > NFS poll(2)/select(2) and kqueue(2) behaviors are incoherent.  Diff
> > below uses the kernel-only NOTE_IMM hint to make the kqueue handlers
> > behave like the current poll handler: the poller is bypassed.
> > 
> > The new EVFILT_WRITE handler doesn't check for NOTE_IMM because it is
> > unlikely to introduce regression.
> > 
> > Is this a preferred approach?  Ok?
> 
> Yes, this does not undermine the functionality.

Thanks for the feedbacks.

> [...] 
> Ah, here it does make a difference that the userland can manipulate
> kn_sfflags. The flag should be immutable (or the need of unwatching
> managed inside nfs_kq.c somehow).

Would the approach below using the last bit masked by EV_SYSFLAGS would
be more appropriate?

I renamed the define to "OLDAPI" matching DragonFly's name because we're
now using it for more than immediate read behavior.

Index: isofs/cd9660/cd9660_vnops.c
===
RCS file: /cvs/src/sys/isofs/cd9660/cd9660_vnops.c,v
retrieving revision 1.83
diff -u -p -r1.83 cd9660_vnops.c
--- isofs/cd9660/cd9660_vnops.c 7 Apr 2020 13:27:51 -   1.83
+++ isofs/cd9660/cd9660_vnops.c 3 Jun 2020 09:14:15 -
@@ -1036,6 +1036,9 @@ filt_cd9660read(struct knote *kn, long h
return (1);
}
 
+   if (kn->kn_flags & EV_OLDAPI)
+   return (1);
+
return (kn->kn_data != 0);
 }
 
Index: miscfs/fuse/fuse_vnops.c
===
RCS file: /cvs/src/sys/miscfs/fuse/fuse_vnops.c,v
retrieving revision 1.59
diff -u -p -r1.59 fuse_vnops.c
--- miscfs/fuse/fuse_vnops.c7 Apr 2020 13:27:51 -   1.59
+++ miscfs/fuse/fuse_vnops.c3 Jun 2020 09:14:15 -
@@ -188,6 +188,9 @@ filt_fusefsread(struct knote *kn, long h
return (1);
}
 
+   if (kn->kn_flags & EV_OLDAPI)
+   return (1);
+
return (kn->kn_data != 0);
 }
 
Index: msdosfs/msdosfs_vnops.c
===
RCS file: /cvs/src/sys/msdosfs/msdosfs_vnops.c,v
retrieving revision 1.132
diff -u -p -r1.132 msdosfs_vnops.c
--- msdosfs/msdosfs_vnops.c 7 Apr 2020 13:27:52 -   1.132
+++ msdosfs/msdosfs_vnops.c 3 Jun 2020 09:14:15 -
@@ -2013,6 +2013,10 @@ filt_msdosfsread(struct knote *kn, long 
kn->kn_fflags |= NOTE_EOF;
return (1);
}
+
+   if (kn->kn_flags & EV_OLDAPI)
+   return (1);
+
return (kn->kn_data != 0);
 }
 
Index: nfs/nfs_kq.c
===
RCS file: /cvs/src/sys/nfs/nfs_kq.c,v
retrieving revision 1.30
diff -u -p -r1.30 nfs_kq.c
--- nfs/nfs_kq.c7 Apr 2020 13:27:52 -   1.30
+++ nfs/nfs_kq.c3 Jun 2020 09:14:15 -
@@ -50,9 +50,12 @@
 #include 
 
 void   nfs_kqpoll(void *);
+intnfs_kqwatch(struct vnode *);
+void   nfs_kqunwatch(struct vnode *);
 
 void   filt_nfsdetach(struct knote *);
 intfilt_nfsread(struct knote *, long);
+intfilt_nfswrite(struct knote *, long);
 intfilt_nfsvnode(struct knote *, long);
 
 struct kevq {
@@ -182,11 +185,19 @@ void
 filt_nfsdetach(struct knote *kn)
 {
struct vnode *vp = (struct vnode *)kn->kn_hook;
-   struct kevq *ke;
 
klist_remove(>v_selectinfo.si_note, kn);
 
/* Remove the vnode from watch list */
+   if ((kn->kn_flags & EV_OLDAPI) == 0)
+   nfs_kqunwatch(vp);
+}
+
+void
+nfs_kqunwatch(struct vnode *vp)
+{
+   struct kevq *ke;
+
rw_enter_write(_lock);
SLIST_FOREACH(ke, , kev_link) {
if (ke->vp == vp) {
@@ -234,10 +245,30 @@ filt_nfsread(struct knote *kn, long hint
kn->kn_fflags |= NOTE_EOF;
return (1);
}
+
+   if (kn->kn_flags & EV_OLDAPI)
+   return (1);
+
 return (kn->kn_data != 0);
 }
 
 int
+filt_nfswrite(struct knote *kn, long hint)
+{
+   /*
+* filesystem is gone, so set the EOF flag and schedule
+* the knote for deletion.
+*/
+   if (hint == NOTE_REVOKE) {
+   kn->kn_flags |= (EV_EOF | EV_ONESHOT);
+   return (1);
+   }
+
+   kn->kn_data = 0;
+   return (1);
+}
+
+int
 filt_nfsvnode(struct knote *kn, long hint)
 {
if (kn->kn_sfflags & hint)
@@ -256,6 +287,13 @@ static const struct filterops nfsread_fi
.f_event= filt_nfsread,
 };
 
+const struct filterops nfswrite_filtops = {
+   .f_flags= FILTEROP_ISFD,
+   .f_attach   = NULL,
+   .f_detach   = filt_nfsdetach,
+   .f_event= filt_nfswrite,
+};
+
 static const struct filterops nfsvnode_filtops = {
.f_flags= FILTEROP_ISFD,
.f_attach   = NULL,
@@ -269,10 +307,6 @@ nfs_kqfilter(void *v)
struct vop_kqfilter_args *ap = 

pf "route-to least-state" in an anchor doesn't work

2020-06-03 Thread YASUOKA Masahiko
Hi,

pf.conf:

  anchor {
pass in on rdomain 102 quick proto tcp to 10.0.0.101 port 8080 \
  keep state ( sloppy ) route-to  \
  least-states sticky-address
  }
  table  {
10.0.0.11@pair102
  }

this doesn't work.  All packets going to 10.0.0.101 are dropped with
'no-route'.  The problem doesn't happen if the pass rule is moved to
outside of the anchor or uses "round-robin" instead of "least-states".

In sys/net/pf_lb.c:
594 if (rpool->addr.type == PF_ADDR_TABLE) {
595 if (pfr_states_increase(rpool->addr.p.tbl,
596 naddr, af) == -1) {
597 if (pf_status.debug >= LOG_DEBUG) {
598 log(LOG_DEBUG,"pf: pf_map_addr: 
"
599 "selected address ");
600 pf_print_host(naddr, 0, af);
601 addlog(". Failed to increase 
count!\n");
602 }
603 return (1);
604 }

This chunk is to increase the counter for "least-state".  The packets
drops here because pfr_states_increase() returns -1.
pfr_states_increase() uses pfr_kentry_byaddr(), and
pfr_kentry_byaddr() uses pfr_lookup_addr() to lookup a kentry in the
table.

pfr_lookup_addr() never succeeded for above case, because it doesn't
care about using global (root) tables from rules in an anchor.  All
other functions which lookup a kentry from the table than
pfr_lookup_addr() seem to take care about that.

I thought that pfr_lookup_addr() is a local function used for ioctl to
create tables and manage its members.  So the keep it
untouched. Instead, the diff replaces the body of pfr_kentry_byaddr()
by the logic of pfr_match_addr().

* * *
Test

1. prepare network

  ifconfig pair101 rdomain 101 10.0.0.1/24
  ifconfig pair102 rdomain 102 10.0.0.10/24
  ifconfig pair102 alias 10.0.0.101/24
  ifconfig pair103 rdomain 103 10.0.0.11/24
  ifconfig pair104 rdomain 100 patch pair101 up
  ifconfig pair105 rdomain 100 patch pair102 up
  ifconfig pair106 rdomain 100 patch pair103 up
  ifconfig lo103 127.0.0.1/8
  ifconfig lo103 alias 10.0.0.101/24

  ifconfig bridge100 add pair104
  ifconfig bridge100 add pair105
  ifconfig bridge100 add pair106 up

2. setup pf.conf

  anchor {
pass in on rdomain 102 quick proto tcp to 10.0.0.101 port 8080 \
  keep state ( sloppy ) route-to  \
  least-states sticky-address
  }
  table  {
10.0.0.11@pair102
  }

3. start a daemon on 8080/tcp on #103

   doas route -T 103 exec nc -l 8080

4. try to connect to it from #101

   doas route -T 101 exec telnet 10.0.0.101 8080

   - test OK if the connection is established

5. teardown

  ifconfig pair106 destroy
  ifconfig pair105 destroy
  ifconfig pair104 destroy
  ifconfig pair103 destroy
  ifconfig pair102 destroy
  ifconfig pair101 destroy
  ifconfig bridge100 destroy

* * *

ok?

Fix pfr_kentry_byaddr() to be used for a rule in an anchor.  It
couldn't find an entry if its table is attached a table on the root.
This fixes the problem "route-to  least-states" doesn't work.
The problem is found by IIJ.

Index: sys/net/pf_table.c
===
RCS file: /cvs/src/sys/net/pf_table.c,v
retrieving revision 1.131
diff -u -p -r1.131 pf_table.c
--- sys/net/pf_table.c  8 Jul 2019 17:49:57 -   1.131
+++ sys/net/pf_table.c  3 Jun 2020 07:21:27 -
@@ -2085,11 +2085,28 @@ int
 pfr_match_addr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af)
 {
struct pfr_kentry   *ke = NULL;
+   int  match;
+
+   ke = pfr_kentry_byaddr(kt, a, af, 0);
+
+   match = (ke && !(ke->pfrke_flags & PFRKE_FLAG_NOT));
+   if (match)
+   kt->pfrkt_match++;
+   else
+   kt->pfrkt_nomatch++;
+
+   return (match);
+}
+
+struct pfr_kentry *
+pfr_kentry_byaddr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af,
+int exact)
+{
+   struct pfr_kentry   *ke = NULL;
struct sockaddr_in   tmp4;
 #ifdef INET6
struct sockaddr_in6  tmp6;
 #endif /* INET6 */
-   int  match;
 
if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
kt = kt->pfrkt_root;
@@ -2116,12 +2133,10 @@ pfr_match_addr(struct pfr_ktable *kt, st
default:
unhandled_af(af);
}
-   match = (ke && !(ke->pfrke_flags & PFRKE_FLAG_NOT));
-   if (match)
-   kt->pfrkt_match++;
-   else
-   kt->pfrkt_nomatch++;
-   return (match);
+   if (exact && ke && KENTRY_NETWORK(ke))
+   ke = NULL;
+
+   return (ke);
 }
 
 void
@@ -2497,39 +2512,6 @@ pfr_states_decrease(struct pfr_ktable *k
"pfr_states_decrease: states-- when states <= 0");