Re: Change the way we handle interface/connected networks

2015-02-09 Thread Martin Pieuchot
On 10/02/15(Tue) 03:04, Claudio Jeker wrote:
 There is no need to not allow the same network to be configured more then
 once. Instead just rely on the multipath and priority handling of the
 routing table to select the right route.
 Additionally this removes cloned routes (arp/npd cache) when the interface
 goes down or when the any of the multipath cloning route is changed.
 
 With this it is possible to run 2 dhclients on wired and wireless with a
 bridged network. Active TCP sessions still fail when the cable is
 unplugged. To fix this more is needed.
 
 This changes a fundamental part of the network stack and therefor broad
 testing is needed to find all the hidden dragons.

I like this a lot.  Should should also kill the IFA_ROUTE flag.  It was
only used to indicate which ifa was owning the cloning route of its
corresponding subnet.

One more comment:

 @@ -1655,6 +1675,9 @@ rt_if_track(struct ifnet *ifp)
   return;
  
   for (tid = 0; tid = rtbl_id_max; tid++) {
 + /* skip rtables that are not in the rdomain of the ifp */
 + if (rtable_l2(tid) != ifp-if_rdomain)
 + continue;

This chunk is not strictly related an should already go in.



USB Ethernet if_input(): axen(4), cdce(4), smsc(4)...

2015-03-18 Thread Martin Pieuchot
I'd love to hear from people using USB Ethernet adapters about the
if_input() conversion diff below.

Please make sure everything works as before, including tcpdump(8).

I also take ok on a per-driver basis :)
M

Index: if_aue.c
===
RCS file: /cvs/src/sys/dev/usb/if_aue.c,v
retrieving revision 1.97
diff -u -p -r1.97 if_aue.c
--- if_aue.c14 Mar 2015 03:38:49 -  1.97
+++ if_aue.c17 Mar 2015 21:45:27 -
@@ -1017,6 +1017,7 @@ aue_rxeof(struct usbd_xfer *xfer, void *
struct aue_softc*sc = c-aue_sc;
struct ifnet*ifp = GET_IFP(sc);
struct mbuf *m;
+   struct mbuf_listml = MBUF_LIST_INITIALIZER();
u_int32_t   total_len;
struct aue_rxpktr;
int s;
@@ -1067,32 +1068,15 @@ aue_rxeof(struct usbd_xfer *xfer, void *
total_len -= ETHER_CRC_LEN + 4;
m-m_pkthdr.len = m-m_len = total_len;
ifp-if_ipackets++;
+   ml_enqueue(ml, m);
 
-   m-m_pkthdr.rcvif = ifp;
-
-   s = splnet();
-
-   /* XXX ugly */
if (aue_newbuf(sc, c, NULL) == ENOBUFS) {
ifp-if_ierrors++;
-   goto done1;
+   goto done;
}
 
-#if NBPFILTER  0
-   /*
-* Handle BPF listeners. Let the BPF user see the packet, but
-* don't pass it up to the ether_input() layer unless it's
-* a broadcast packet, multicast packet, matches our ethernet
-* address or the interface is in promiscuous mode.
-*/
-   if (ifp-if_bpf)
-   bpf_mtap(ifp-if_bpf, m, BPF_DIRECTION_IN);
-#endif
-
-   DPRINTFN(10,(%s: %s: deliver %d\n, sc-aue_dev.dv_xname,
-   __func__, m-m_len));
-   ether_input_mbuf(ifp, m);
- done1:
+   s = splnet();
+   if_input(ifp, ml);
splx(s);
 
  done:
Index: if_axen.c
===
RCS file: /cvs/src/sys/dev/usb/if_axen.c,v
retrieving revision 1.11
diff -u -p -r1.11 if_axen.c
--- if_axen.c   22 Jan 2015 10:23:47 -  1.11
+++ if_axen.c   17 Mar 2015 20:45:27 -
@@ -948,6 +948,7 @@ axen_rxeof(struct usbd_xfer *xfer, void 
struct axen_softc   *sc = c-axen_sc;
struct ifnet*ifp = GET_IFP(sc);
u_char  *buf = c-axen_buf;
+   struct mbuf_listml = MBUF_LIST_INITIALIZER();
struct mbuf *m;
u_int32_t   total_len;
u_int32_t   rx_hdr, pkt_hdr;
@@ -1053,7 +1054,6 @@ axen_rxeof(struct usbd_xfer *xfer, void 
 
/* skip pseudo header (2byte) */
ifp-if_ipackets++;
-   m-m_pkthdr.rcvif = ifp;
m-m_pkthdr.len = m-m_len = pkt_len - 2;
 
 #ifdef AXEN_TOE
@@ -1078,14 +1078,7 @@ axen_rxeof(struct usbd_xfer *xfer, void 
 
memcpy(mtod(m, char *), buf + 2, pkt_len - 2);
 
-   /* push the packet up */
-   s = splnet();
-#if NBPFILTER  0
-   if (ifp-if_bpf)
-   bpf_mtap(ifp-if_bpf, m, BPF_DIRECTION_IN);
-#endif
-   ether_input_mbuf(ifp, m);
-   splx(s);
+   ml_enqueue(ml, m);
 
 nextpkt:
/*
@@ -1100,6 +1093,11 @@ nextpkt:
} while( pkt_count  0);
 
 done:
+   /* push the packet up */
+   s = splnet();
+   if_input(ifp, ml);
+   splx(s);
+
/* clear buffer for next transaction */
memset(c-axen_buf, 0, sc-axen_bufsz);
 
Index: if_cdce.c
===
RCS file: /cvs/src/sys/dev/usb/if_cdce.c,v
retrieving revision 1.63
diff -u -p -r1.63 if_cdce.c
--- if_cdce.c   14 Mar 2015 03:38:49 -  1.63
+++ if_cdce.c   17 Mar 2015 21:45:15 -
@@ -726,6 +726,7 @@ cdce_rxeof(struct usbd_xfer *xfer, void 
struct cdce_softc   *sc = c-cdce_sc;
struct ifnet*ifp = GET_IFP(sc);
struct mbuf *m;
+   struct mbuf_list ml = MBUF_LIST_INITIALIZER();
int  total_len = 0;
int  s;
 
@@ -767,25 +768,16 @@ cdce_rxeof(struct usbd_xfer *xfer, void 
}
 
ifp-if_ipackets++;
-
m-m_pkthdr.len = m-m_len = total_len;
-   m-m_pkthdr.rcvif = ifp;
-
-   s = splnet();
+   ml_enqueue(ml, m);
 
if (cdce_newbuf(sc, c, NULL) == ENOBUFS) {
ifp-if_ierrors++;
-   goto done1;
+   goto done;
}
 
-#if NBPFILTER  0
-   if (ifp-if_bpf)
-   bpf_mtap(ifp-if_bpf, m, BPF_DIRECTION_IN);
-#endif
-
-   ether_input_mbuf(ifp, m);
-
-done1:
+   s = splnet();
+   if_input(ifp, ml);
splx(s);
 
 done:
Index: if_cdcef.c
===
RCS file: /cvs/src/sys/dev/usb/if_cdcef.c,v
retrieving revision 1.35

Re: ARP mbuf queues

2015-03-19 Thread Martin Pieuchot
On 19/03/15(Thu) 14:11, Martin Pieuchot wrote:
 When a host want to send packets to a destination whose Ethernet address
 that has not been resolved yet, it puts such packet on a mbuf queue.
 
 Right now this queue, linked to the corresponding ARP data structure, is
 hand rolled.  I wrote the diff below during s2k15 to make use of the
 mq_enqueue(9) API instead.
 
 I verified that the queue is correctly dropped when an infinite recursion
 in in_arpinput() is triggered.
 
 Comments, Ok?

Oops, kspillner@ pointed that I forgot a chunk, here's the full
diff:

Index: netinet/if_ether.c
===
RCS file: /cvs/src/sys/netinet/if_ether.c,v
retrieving revision 1.148
diff -u -p -r1.148 if_ether.c
--- netinet/if_ether.c  14 Mar 2015 17:13:44 -  1.148
+++ netinet/if_ether.c  19 Mar 2015 13:04:28 -
@@ -69,7 +69,6 @@
 #endif
 
 #define SDL(s) ((struct sockaddr_dl *)s)
-#define SRP(s) ((struct sockaddr_inarp *)s)
 
 /*
  * ARP trailer negotiation.  Trailer protocol is not IP specific,
@@ -77,6 +76,15 @@
  */
 #define ETHERTYPE_IPTRAILERS ETHERTYPE_TRAIL
 
+struct llinfo_arp {
+   LIST_ENTRY(llinfo_arp)   la_list;
+   struct rtentry  *la_rt; /* backpointer to rtentry */
+   long la_asked;  /* last time we QUERIED */
+   struct mbuf_queuela_mq; /* packet hold queue */
+};
+#define LA_HOLD_QUEUE 10
+#define LA_HOLD_TOTAL 100
+
 /* timer values */
 intarpt_prune = (5*60*1);  /* walk list every 5 minutes */
 intarpt_keep = (20*60);/* once resolved, good for 20 more minutes */
@@ -220,6 +228,7 @@ arp_rtrequest(int req, struct rtentry *r
 * add with a LL address.
 */
la = pool_get(arp_pool, PR_NOWAIT | PR_ZERO);
+   mq_init(la-la_mq, LA_HOLD_QUEUE, IPL_NONE);
rt-rt_llinfo = (caddr_t)la;
if (la == NULL) {
log(LOG_DEBUG, %s: malloc failed\n, __func__);
@@ -282,8 +291,7 @@ arp_rtrequest(int req, struct rtentry *r
LIST_REMOVE(la, la_list);
rt-rt_llinfo = 0;
rt-rt_flags = ~RTF_LLINFO;
-   while ((m = la-la_hold_head) != NULL) {
-   la-la_hold_head = la-la_hold_head-m_nextpkt;
+   while ((m = mq_dequeue(la-la_mq)) != NULL) {
la_hold_total--;
m_freem(m);
}
@@ -425,32 +433,14 @@ arpresolve(struct arpcom *ac, struct rte
 * response yet. Insert mbuf in hold queue if below limit
 * if above the limit free the queue without queuing the new packet.
 */
-   if (la_hold_total  MAX_HOLD_TOTAL  la_hold_total  nmbclust / 64) {
-   if (la-la_hold_count = MAX_HOLD_QUEUE) {
-   mh = la-la_hold_head;
-   la-la_hold_head = la-la_hold_head-m_nextpkt;
-   if (mh == la-la_hold_tail)
-   la-la_hold_tail = NULL;
-   la-la_hold_count--;
-   la_hold_total--;
-   m_freem(mh);
-   }
-   if (la-la_hold_tail == NULL)
-   la-la_hold_head = m;
-   else
-   la-la_hold_tail-m_nextpkt = m;
-   la-la_hold_tail = m;
-   la-la_hold_count++;
-   la_hold_total++;
+   if (la_hold_total  LA_HOLD_TOTAL  la_hold_total  nmbclust / 64) {
+   if (mq_enqueue(la-la_mq, m) == 0)
+   la_hold_total++;
} else {
-   while ((mh = la-la_hold_head) != NULL) {
-   la-la_hold_head =
-   la-la_hold_head-m_nextpkt;
+   while ((mh = mq_dequeue(la-la_mq)) != NULL) {
la_hold_total--;
m_freem(mh);
}
-   la-la_hold_tail = NULL;
-   la-la_hold_count = 0;
m_freem(m);
}
 
@@ -483,14 +473,10 @@ arpresolve(struct arpcom *ac, struct rte
rt-rt_flags |= RTF_REJECT;
rt-rt_expire += arpt_down;
la-la_asked = 0;
-   while ((mh = la-la_hold_head) != NULL) {
-   la-la_hold_head =
-   la-la_hold_head-m_nextpkt;
+   while ((mh = mq_dequeue(la-la_mq)) != NULL) {
la_hold_total--;
m_freem(mh);
}
-   la-la_hold_tail = NULL;
-   la-la_hold_count = 0;
}
}
}
@@ -570,13 +556,14 @@ in_arpinput(struct mbuf *m)
struct sockaddr_dl *sdl

Re: ARP mbuf queues

2015-03-21 Thread Martin Pieuchot
On 21/03/15(Sat) 17:48, David Gwynne wrote:
 
  On 19 Mar 2015, at 11:11 pm, Martin Pieuchot m...@openbsd.org wrote:
  
  When a host want to send packets to a destination whose Ethernet address
  that has not been resolved yet, it puts such packet on a mbuf queue.
  
  Right now this queue, linked to the corresponding ARP data structure, is
  hand rolled.  I wrote the diff below during s2k15 to make use of the
  mq_enqueue(9) API instead.
  
  I verified that the queue is correctly dropped when an infinite recursion
  in in_arpinput() is triggered.
  
  Comments, Ok?
 
 the global count is annoying to read, but i get it.
 
 why mbuf_queues instead of mbuf_lists? just to get the drop on mq_enqueue?

Yep...  That might be overkill since we do not really use the mutex.  Do
you prefer the version below using a mbuf_list?

As a bonus this diff only call ml_init() if the pool allocation succeed.

Index: netinet/if_ether.c
===
RCS file: /cvs/src/sys/netinet/if_ether.c,v
retrieving revision 1.148
diff -u -p -r1.148 if_ether.c
--- netinet/if_ether.c  14 Mar 2015 17:13:44 -  1.148
+++ netinet/if_ether.c  21 Mar 2015 13:59:55 -
@@ -69,7 +69,6 @@
 #endif
 
 #define SDL(s) ((struct sockaddr_dl *)s)
-#define SRP(s) ((struct sockaddr_inarp *)s)
 
 /*
  * ARP trailer negotiation.  Trailer protocol is not IP specific,
@@ -77,6 +76,15 @@
  */
 #define ETHERTYPE_IPTRAILERS ETHERTYPE_TRAIL
 
+struct llinfo_arp {
+   LIST_ENTRY(llinfo_arp)   la_list;
+   struct rtentry  *la_rt; /* backpointer to rtentry */
+   long la_asked;  /* last time we QUERIED */
+   struct mbuf_list la_ml; /* packet hold queue */
+};
+#define LA_HOLD_QUEUE 10
+#define LA_HOLD_TOTAL 100
+
 /* timer values */
 intarpt_prune = (5*60*1);  /* walk list every 5 minutes */
 intarpt_keep = (20*60);/* once resolved, good for 20 more minutes */
@@ -227,6 +235,7 @@ arp_rtrequest(int req, struct rtentry *r
}
arp_inuse++;
arp_allocated++;
+   ml_init(la-la_ml);
la-la_rt = rt;
rt-rt_flags |= RTF_LLINFO;
LIST_INSERT_HEAD(llinfo_arp, la, la_list);
@@ -282,8 +291,7 @@ arp_rtrequest(int req, struct rtentry *r
LIST_REMOVE(la, la_list);
rt-rt_llinfo = 0;
rt-rt_flags = ~RTF_LLINFO;
-   while ((m = la-la_hold_head) != NULL) {
-   la-la_hold_head = la-la_hold_head-m_nextpkt;
+   while ((m = ml_dequeue(la-la_ml)) != NULL) {
la_hold_total--;
m_freem(m);
}
@@ -425,32 +433,19 @@ arpresolve(struct arpcom *ac, struct rte
 * response yet. Insert mbuf in hold queue if below limit
 * if above the limit free the queue without queuing the new packet.
 */
-   if (la_hold_total  MAX_HOLD_TOTAL  la_hold_total  nmbclust / 64) {
-   if (la-la_hold_count = MAX_HOLD_QUEUE) {
-   mh = la-la_hold_head;
-   la-la_hold_head = la-la_hold_head-m_nextpkt;
-   if (mh == la-la_hold_tail)
-   la-la_hold_tail = NULL;
-   la-la_hold_count--;
+   if (la_hold_total  LA_HOLD_TOTAL  la_hold_total  nmbclust / 64) {
+   if (ml_len(la-la_ml) = LA_HOLD_QUEUE) {
+   mh = ml_dequeue(la-la_ml);
la_hold_total--;
m_freem(mh);
}
-   if (la-la_hold_tail == NULL)
-   la-la_hold_head = m;
-   else
-   la-la_hold_tail-m_nextpkt = m;
-   la-la_hold_tail = m;
-   la-la_hold_count++;
+   ml_enqueue(la-la_ml, m);
la_hold_total++;
} else {
-   while ((mh = la-la_hold_head) != NULL) {
-   la-la_hold_head =
-   la-la_hold_head-m_nextpkt;
+   while ((mh = ml_dequeue(la-la_ml)) != NULL) {
la_hold_total--;
m_freem(mh);
}
-   la-la_hold_tail = NULL;
-   la-la_hold_count = 0;
m_freem(m);
}
 
@@ -483,14 +478,10 @@ arpresolve(struct arpcom *ac, struct rte
rt-rt_flags |= RTF_REJECT;
rt-rt_expire += arpt_down;
la-la_asked = 0;
-   while ((mh = la-la_hold_head) != NULL) {
-   la-la_hold_head =
-   la-la_hold_head-m_nextpkt;
+   while ((mh = ml_dequeue(la-la_ml)) != NULL) {
la_hold_total

ARP mbuf queues

2015-03-19 Thread Martin Pieuchot
When a host want to send packets to a destination whose Ethernet address
that has not been resolved yet, it puts such packet on a mbuf queue.

Right now this queue, linked to the corresponding ARP data structure, is
hand rolled.  I wrote the diff below during s2k15 to make use of the
mq_enqueue(9) API instead.

I verified that the queue is correctly dropped when an infinite recursion
in in_arpinput() is triggered.

Comments, Ok?

Index: netinet/if_ether.c
===
RCS file: /cvs/src/sys/netinet/if_ether.c,v
retrieving revision 1.148
diff -u -p -r1.148 if_ether.c
--- netinet/if_ether.c  14 Mar 2015 17:13:44 -  1.148
+++ netinet/if_ether.c  19 Mar 2015 13:04:28 -
@@ -69,7 +69,6 @@
 #endif
 
 #define SDL(s) ((struct sockaddr_dl *)s)
-#define SRP(s) ((struct sockaddr_inarp *)s)
 
 /*
  * ARP trailer negotiation.  Trailer protocol is not IP specific,
@@ -77,6 +76,15 @@
  */
 #define ETHERTYPE_IPTRAILERS ETHERTYPE_TRAIL
 
+struct llinfo_arp {
+   LIST_ENTRY(llinfo_arp)   la_list;
+   struct rtentry  *la_rt; /* backpointer to rtentry */
+   long la_asked;  /* last time we QUERIED */
+   struct mbuf_queuela_mq; /* packet hold queue */
+};
+#define LA_HOLD_QUEUE 10
+#define LA_HOLD_TOTAL 100
+
 /* timer values */
 intarpt_prune = (5*60*1);  /* walk list every 5 minutes */
 intarpt_keep = (20*60);/* once resolved, good for 20 more minutes */
@@ -220,6 +228,7 @@ arp_rtrequest(int req, struct rtentry *r
 * add with a LL address.
 */
la = pool_get(arp_pool, PR_NOWAIT | PR_ZERO);
+   mq_init(la-la_mq, LA_HOLD_QUEUE, IPL_NONE);
rt-rt_llinfo = (caddr_t)la;
if (la == NULL) {
log(LOG_DEBUG, %s: malloc failed\n, __func__);
@@ -282,8 +291,7 @@ arp_rtrequest(int req, struct rtentry *r
LIST_REMOVE(la, la_list);
rt-rt_llinfo = 0;
rt-rt_flags = ~RTF_LLINFO;
-   while ((m = la-la_hold_head) != NULL) {
-   la-la_hold_head = la-la_hold_head-m_nextpkt;
+   while ((m = mq_dequeue(la-la_mq)) != NULL) {
la_hold_total--;
m_freem(m);
}
@@ -425,32 +433,14 @@ arpresolve(struct arpcom *ac, struct rte
 * response yet. Insert mbuf in hold queue if below limit
 * if above the limit free the queue without queuing the new packet.
 */
-   if (la_hold_total  MAX_HOLD_TOTAL  la_hold_total  nmbclust / 64) {
-   if (la-la_hold_count = MAX_HOLD_QUEUE) {
-   mh = la-la_hold_head;
-   la-la_hold_head = la-la_hold_head-m_nextpkt;
-   if (mh == la-la_hold_tail)
-   la-la_hold_tail = NULL;
-   la-la_hold_count--;
-   la_hold_total--;
-   m_freem(mh);
-   }
-   if (la-la_hold_tail == NULL)
-   la-la_hold_head = m;
-   else
-   la-la_hold_tail-m_nextpkt = m;
-   la-la_hold_tail = m;
-   la-la_hold_count++;
-   la_hold_total++;
+   if (la_hold_total  LA_HOLD_TOTAL  la_hold_total  nmbclust / 64) {
+   if (mq_enqueue(la-la_mq, m) == 0)
+   la_hold_total++;
} else {
-   while ((mh = la-la_hold_head) != NULL) {
-   la-la_hold_head =
-   la-la_hold_head-m_nextpkt;
+   while ((mh = mq_dequeue(la-la_mq)) != NULL) {
la_hold_total--;
m_freem(mh);
}
-   la-la_hold_tail = NULL;
-   la-la_hold_count = 0;
m_freem(m);
}
 
@@ -483,14 +473,10 @@ arpresolve(struct arpcom *ac, struct rte
rt-rt_flags |= RTF_REJECT;
rt-rt_expire += arpt_down;
la-la_asked = 0;
-   while ((mh = la-la_hold_head) != NULL) {
-   la-la_hold_head =
-   la-la_hold_head-m_nextpkt;
+   while ((mh = mq_dequeue(la-la_mq)) != NULL) {
la_hold_total--;
m_freem(mh);
}
-   la-la_hold_tail = NULL;
-   la-la_hold_count = 0;
}
}
}
@@ -570,13 +556,14 @@ in_arpinput(struct mbuf *m)
struct sockaddr_dl *sdl;
struct sockaddr sa;
struct in_addr isaddr, itaddr, myaddr;
-   struct mbuf *mh, *mt;
+   struct mbuf *mh;

Re: Change the way we handle interface/connected networks

2015-03-17 Thread Martin Pieuchot
On 12/02/15(Thu) 12:35, Martin Pieuchot wrote:
 On 10/02/15(Tue) 03:04, Claudio Jeker wrote:
  There is no need to not allow the same network to be configured more then
  once. Instead just rely on the multipath and priority handling of the
  routing table to select the right route.
  Additionally this removes cloned routes (arp/npd cache) when the interface
  goes down or when the any of the multipath cloning route is changed.
  
  With this it is possible to run 2 dhclients on wired and wireless with a
  bridged network. Active TCP sessions still fail when the cable is
  unplugged. To fix this more is needed.
  
  This changes a fundamental part of the network stack and therefor broad
  testing is needed to find all the hidden dragons.
 
 Here's version of the diff rebased on top of the recent changes.

I think it's the time to get this in, then as a second step put the
dhclient(8) bits.

Claudio you have my ok.

 Index: net/if_var.h
 ===
 RCS file: /cvs/src/sys/net/if_var.h,v
 retrieving revision 1.20
 diff -u -p -r1.20 if_var.h
 --- net/if_var.h  9 Feb 2015 03:09:57 -   1.20
 +++ net/if_var.h  12 Feb 2015 11:08:40 -
 @@ -392,6 +392,7 @@ do {  
 \
  /* default interface priorities */
  #define IF_WIRED_DEFAULT_PRIORITY0
  #define IF_WIRELESS_DEFAULT_PRIORITY 4
 +#define IF_CARP_DEFAULT_PRIORITY 15
  
  extern struct ifnet_head ifnet;
  extern struct ifnet *lo0ifp;
 Index: net/route.c
 ===
 RCS file: /cvs/src/sys/net/route.c,v
 retrieving revision 1.206
 diff -u -p -r1.206 route.c
 --- net/route.c   11 Feb 2015 23:34:43 -  1.206
 +++ net/route.c   12 Feb 2015 11:08:40 -
 @@ -554,6 +554,16 @@ rtdeletemsg(struct rtentry *rt, u_int ta
   return (error);
  }
  
 +static inline int
 +rtequal(struct rtentry *a, struct rtentry *b)
 +{
 + if (memcmp(rt_key(a), rt_key(b), rt_key(a)-sa_len) == 0 
 + memcmp(rt_mask(a), rt_mask(b), rt_mask(a)-sa_len) == 0)
 + return 1;
 + else
 + return 0;
 +}
 +
  int
  rtflushclone1(struct radix_node *rn, void *arg, u_int id)
  {
 @@ -561,7 +571,8 @@ rtflushclone1(struct radix_node *rn, voi
  
   rt = (struct rtentry *)rn;
   parent = (struct rtentry *)arg;
 - if ((rt-rt_flags  RTF_CLONED) != 0  rt-rt_parent == parent)
 + if ((rt-rt_flags  RTF_CLONED) != 0  (rt-rt_parent == parent ||
 + rtequal(rt-rt_parent, parent)))
   rtdeletemsg(rt, id);
   return 0;
  }
 @@ -1106,16 +1117,20 @@ rt_ifa_add(struct ifaddr *ifa, int flags
  {
   struct rtentry  *rt, *nrt = NULL;
   struct sockaddr_rtlabel  sa_rl;
 + struct sockaddr_dl   sa_dl = { sizeof(sa_dl), AF_LINK };
   struct rt_addrinfo   info;
   u_short  rtableid = ifa-ifa_ifp-if_rdomain;
 - u_int8_t prio = RTP_CONNECTED;
 + u_int8_t prio = ifa-ifa_ifp-if_priority + RTP_STATIC;
   int  error;
  
 + sa_dl.sdl_type = ifa-ifa_ifp-if_type;
 + sa_dl.sdl_index = ifa-ifa_ifp-if_index;
 +
   memset(info, 0, sizeof(info));
   info.rti_ifa = ifa;
 - info.rti_flags = flags;
 + info.rti_flags = flags | RTF_MPATH;
   info.rti_info[RTAX_DST] = dst;
 - info.rti_info[RTAX_GATEWAY] = ifa-ifa_addr;
 + info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)sa_dl;
   info.rti_info[RTAX_LABEL] =
   rtlabel_id2sa(ifa-ifa_ifp-if_rtlabelid, sa_rl);
  
 @@ -1161,8 +1176,9 @@ rt_ifa_del(struct ifaddr *ifa, int flags
   struct sockaddr *deldst;
   struct rt_addrinfo   info;
   struct sockaddr_rtlabel  sa_rl;
 + struct sockaddr_dl   sa_dl = { sizeof(sa_dl), AF_LINK };
   u_short  rtableid = ifa-ifa_ifp-if_rdomain;
 - u_int8_t prio = RTP_CONNECTED;
 + u_int8_t prio = ifa-ifa_ifp-if_priority + RTP_STATIC;
   int  error;
  
   if ((flags  RTF_HOST) == 0  ifa-ifa_netmask) {
 @@ -1187,10 +1203,14 @@ rt_ifa_del(struct ifaddr *ifa, int flags
   }
   }
  
 + sa_dl.sdl_type = ifa-ifa_ifp-if_type;
 + sa_dl.sdl_index = ifa-ifa_ifp-if_index;
 +
   memset(info, 0, sizeof(info));
   info.rti_ifa = ifa;
   info.rti_flags = flags;
   info.rti_info[RTAX_DST] = dst;
 + info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)sa_dl;
   info.rti_info[RTAX_LABEL] =
   rtlabel_id2sa(ifa-ifa_ifp-if_rtlabelid, sa_rl);
  
 @@ -1692,6 +1715,15 @@ rt_if_linkstate_change(struct radix_node
   }
   } else {
   if (rt-rt_flags  RTF_UP) {
 + /*
 +  * Remove cloned routes (mainly arp) to
 +  * down interfaces so we have

Re: A new batch of patches for ldpd(8)

2015-03-09 Thread Martin Pieuchot
Hi Renato,

On 06/03/15(Fri) 15:03, Renato Westphal wrote:
 Hi all,
 
 I've done a lot of work on ldpd(8) a long time ago but only now I
 found time to organize my patches and send them to review. The patches
 can be found here:
 https://github.com/rwestphal/openbsd-ldpd/commits/renato-2015
 
 The major changes introduced are:
 * VPLS signaling;
 * Configuration reload support (sighup);
 * MD5 authentication as per RFC 5036;
 * Major rework of the label mapping algorithms.
 
 Along with that there are a lot of bug fixes, code simplification and
 removal of dead code.
 
 Rafael Zalamena is working on the datapath implementation of the VPLS
 solution and will release his patches anytime soon.
 
 Suggestions, comments and feedback are welcome.

Can you send the diffs one by one to tech@ so that people can review
them and give you oks to commit?  Some of them are trivial and should
be quickly reviewed.

Please do not flood the list, don't send them all at once :)

Martin



Re: fix stuck cancelled ehci xfers (ehci_idone: ex=%p is done!)

2015-03-09 Thread Martin Pieuchot
On 26/02/15(Thu) 01:51, Martin Pieuchot wrote:
 On 22/02/15(Sun) 00:54, Martin Pieuchot wrote:
  On 20/02/15(Fri) 23:15, Stefan Sperling wrote:
   In the following configuration I can pretty easily trigger endless
   screenfulls of scrolling ehci_idone: ex=%p is done! messages,
   where %p is to a constant pointer value (same in each message).
   
   [ehci host]--[usb extension cable (hub 1)]--[usb keyboard (hub 
   2)]--[usb mouse]
   
   So there's a USB extension cable (hub 1) that I plug a keyboard into.
   The keyboard in turn has a built-in hub that has a mouse plugged into it.
   The problem happens almost every time I pull the keyboard out of hub 1.
   
   I've also seen it happen without any intervention on my part (In fact
   I was in a different city and couldn't use the box until I got home
   several hours later and hit the reset switch... that's no fun)
   
   So if a transfer is cancelled (e.g. as a result of pulling the plug), 
   then:
   
   - usbd_abort_pipe wants to abort a related transfer
   - ehci_abort_xfer schedules and waits for ehci_softintr, expecting
 the softintr routine to deal with the cancelled transfer:
   
 /*
  * Step 3: Make sure the soft interrupt routine has run. This
  * should remove any completed items off the queue.
  * The hardware has no reference to completed items (TDs).
  * It's safe to remove them at any time.
  */
 s = splusb();
 sc-sc_softwake = 1;
 usb_schedsoftintr(sc-sc_bus);
 tsleep(sc-sc_softwake, PZERO, ehciab, 0);
   
   - ehci_softintr gets scheduled
   - ehci_softintr loops over xfers on the sc_intrhead TAILQ and
 invokes ehci_check_intr on each
   - ehci_check_intr eventually ends up calling ehci_idone
   - ehci_idone does nothing for cancelled transfers... ?!?
   
 if (xfer-status == USBD_CANCELLED ||
 xfer-status == USBD_TIMEOUT) {
 DPRINTF((ehci_idone: aborted xfer=%p\n, xfer));
 return;
 }
   
   - something else happens
   
   - ehci_abort_xfer awakes from tsleep and sets ex-isdone, since it
 expects the softinterrupt routine to have dealt with the xfer
   
   - something else happens
   
   - the host controller sends an INTERR interrupt
   - ehci_intr1 schedules ehci_softintr
   - ehci_softintr loops over xfers on the sc_intrhead TAILQ and
 invokes ehci_check_intr on each
   - the cancelled xfer is still in the intrhead TAILQ and ends up in 
   ehci_idone
   - ehci_idone looks for the isdone flag which is now set, then it
 complains and does nothing
   
   - the host controller sends an INTERR interrupt
   ... same story again, we get an endless loop
   
   This diff breaks the chain of events and fixes the endless loop for me.
   I can't reproduce the problem anymore by pulling the keyboard out.
   I don't quite understand how this prevents the flood of INTERR interrupts
   but it seems to work.
   
   I assume there are nasty tentacles in USB land which I'm unfamiliar with.
   Is there any reason this could be a bad idea?
  
  Stefan that's a really good analysis.  I think the diff might not be
  completely correct though.
  
  So basically you're removing the transfer from the active list.  That's
  generally done after the USB callback has been executed, in your case in
  ehci_device_intr_done().
  
  For interrupt transfers (pipe-repeat is 1) the transfer is kept on the
  list while the descriptors are freed/reallocated.  That should be safe
  since we should be reusing the sames.  
  
  So I don't know if we are missing a spl protection of if there's an xfer 
  leak but I'm afraid that with your diff usb_transfer_complete() might
  not be called for the failing xfer.
  
  That's easy to check, look if the ehcixfer pool counter increase when
  you detach your device.
  
  I'm afraid I cannot help more as I am currently traveling :)
 
 Here's an alternative diff that removes xfers from the list of
 currently pending transfers as soon as they are finished or
 cancelled.  This should also prevent the race you analysed.
 
 With this diff ehci_idone() should no longer be called on a
 CANCELLED or TIMEOUT xfer which mean the 'isdone' check can
 be removed.

I reverted the previous diff because it did not properly abort transfer
that were on the pipe's list but not on the interrupt list.

Diff below should take care of that and add an extra check for
suspend/resume.  I'm looking for test reports and oks.

I tried to keep the diff small, cleanups can be done afterward.

Index: ehci.c
===
RCS file: /cvs/src/sys/dev/usb/ehci.c,v
retrieving revision 1.176
diff -u -p -r1.176 ehci.c
--- ehci.c  6 Mar 2015 22:53:03 -   1.176
+++ ehci.c  9 Mar 2015 16:03:56 -
@@ -206,11 +206,7 @@ void   ehci_dump_exfer(struct ehci_xfer *
 #define ehci_add_intr_list(sc, ex) \
TAILQ_INSERT_TAIL((sc)-sc_intrhead, (ex), inext);
 #define ehci_del_intr_list(sc, ex

vlan+bridge stack integration

2015-03-12 Thread Martin Pieuchot
I'm progressively changing how pseudo-drivers are plugged into our
network stack with the goal to turning them MP-safe.

The diff below is a simple refactoring and should not introduce any
behavior change.  It moves a bridge-specific vlan-related chunk of
code into the bridge(4) driver.

I'd like to hear from people with a simple, complicated or even weird
bridge+vlan setup :)

I also appreciate comments and oks!

Index: net/if_bridge.c
===
RCS file: /cvs/src/sys/net/if_bridge.c,v
retrieving revision 1.232
diff -u -p -r1.232 if_bridge.c
--- net/if_bridge.c 6 Feb 2015 22:10:43 -   1.232
+++ net/if_bridge.c 11 Mar 2015 14:32:05 -
@@ -117,6 +117,8 @@ voidbridge_localbroadcast(struct bridge
 struct ether_header *, struct mbuf *);
 void   bridge_span(struct bridge_softc *, struct ether_header *,
 struct mbuf *);
+struct mbuf *bridge_dispatch(struct bridge_iflist *, struct ifnet *,
+struct ether_header *, struct mbuf *);
 void   bridge_stop(struct bridge_softc *);
 void   bridge_init(struct bridge_softc *);
 intbridge_bifconf(struct bridge_softc *, struct ifbifconf *);
@@ -1299,10 +1301,10 @@ struct mbuf *
 bridge_input(struct ifnet *ifp, struct ether_header *eh, struct mbuf *m)
 {
struct bridge_softc *sc;
-   int s;
-   struct bridge_iflist *ifl, *srcifl;
-   struct arpcom *ac;
-   struct mbuf *mc;
+   struct bridge_iflist *ifl;
+#if NVLAN  0
+   uint16_t etype = ntohs(eh-ether_type);
+#endif /* NVLAN  0 */
 
/*
 * Make sure this interface is a bridge member.
@@ -1328,6 +1330,31 @@ bridge_input(struct ifnet *ifp, struct e
 
bridge_span(sc, eh, m);
 
+   m = bridge_dispatch(ifl, ifp, eh, m);
+
+#if NVLAN  0
+   if ((m != NULL)  ((m-m_flags  M_VLANTAG) ||
+   etype == ETHERTYPE_VLAN || etype == ETHERTYPE_QINQ)) {
+   /* The bridge did not want the vlan frame either, drop it. */
+   ifp-if_noproto++;
+   m_freem(m);
+   m = NULL;
+   }
+#endif /* NVLAN  0 */
+
+   return (m);
+}
+
+struct mbuf *
+bridge_dispatch(struct bridge_iflist *ifl, struct ifnet *ifp,
+struct ether_header *eh, struct mbuf *m)
+{
+   struct bridge_softc *sc = ifl-bridge_sc;
+   struct bridge_iflist *srcifl;
+   struct arpcom *ac;
+   struct mbuf *mc;
+   int s;
+
if (m-m_flags  (M_BCAST | M_MCAST)) {
/*
 * Reserved destination MAC addresses (01:80:C2:00:00:0x)
@@ -1377,6 +1404,7 @@ bridge_input(struct ifnet *ifp, struct e
IF_ENQUEUE(sc-sc_if.if_snd, mc);
splx(s);
schednetisr(NETISR_BRIDGE);
+#if NGIF  0
if (ifp-if_type == IFT_GIF) {
TAILQ_FOREACH(ifl, sc-sc_iflist, next) {
if (ifl-ifp-if_type == IFT_ETHER)
@@ -1396,6 +1424,7 @@ bridge_input(struct ifnet *ifp, struct e
m = NULL;
}
}
+#endif /* NGIF */
return (m);
}
 
@@ -1446,11 +1475,13 @@ bridge_input(struct ifnet *ifp, struct e
 
m-m_pkthdr.rcvif = ifl-ifp;
m-m_pkthdr.ph_rtableid = ifl-ifp-if_rdomain;
+#if NGIF  0
if (ifp-if_type == IFT_GIF) {
m-m_flags |= M_PROTO1;
ether_input(ifl-ifp, eh, m);
m = NULL;
}
+#endif /* NGIF */
return (m);
}
if (bcmp(ac-ac_enaddr, eh-ether_shost, ETHER_ADDR_LEN) == 0
Index: net/if_ethersubr.c
===
RCS file: /cvs/src/sys/net/if_ethersubr.c,v
retrieving revision 1.189
diff -u -p -r1.189 if_ethersubr.c
--- net/if_ethersubr.c  16 Feb 2015 18:24:02 -  1.189
+++ net/if_ethersubr.c  11 Mar 2015 11:06:54 -
@@ -563,16 +563,6 @@ ether_input(struct ifnet *ifp0, void *hd
}
 #endif
 
-#if NVLAN  0
-   if ((m-m_flags  M_VLANTAG) || etype == ETHERTYPE_VLAN ||
-   etype == ETHERTYPE_QINQ) {
-   /* The bridge did not want the vlan frame either, drop it. */
-   ifp-if_noproto++;
-   m_freem(m);
-   return (1);
-   }
-#endif /* NVLAN  0 */
-
 #if NCARP  0
if (ifp-if_carp) {
if (ifp-if_type != IFT_CARP  (carp_input(ifp, eh, m) == 0))



Re: Async upd(4)

2015-03-09 Thread Martin Pieuchot
On 05/03/15(Thu) 12:25, David Higgs wrote:
 
 On Mar 3, 2015, at 8:44 AM, David Higgs hig...@gmail.com wrote:
 
  With much help from mpi@, I have made a first big step towards improving 
  upd(4).  I’m not sure when tree lock ends, but I’m still happy to accept 
  feedback if right now isn’t the time to commit.  There’s plenty more to do, 
  but I’d like to get this ironed out before moving on.
  
  New behavior with this diff:
  - Leverage new USB async reporting (must have up-to-date tree)
  - Sensor dependencies ensure reports are gathered in useful order
  - Track pending reports to minimize USB queries
  - Relevant sensors immediately invalidated when battery is removed (instead 
  of waiting for the next refresh)
  
  Things that may need sanity checks:
  - Simplified upd_attach; the old code seemed to have redundant / confusing 
  logic
  - Integer promotion/truncation with batpres and hdata/adjust variables
 
 Below is an overhauled diff with some additional considerations I came up 
 with.
 
 Improvements on the previous version:
 - ACPresent no longer depends on BatteryPresent
 - Sensor dependencies now handled manually, so dynamic behavior can be added 
 later.
 - Avoid hypothetical cases where certain USB report layouts could trigger:
   - Infinite loops in sensor dependencies.
   - Updating sensor contents using stale information.
   - Unnecessary sensor invalidation.
   - Redundant USB transfers.
 
 As before, comments, questions, and feedback are welcome.

That's great, some comments inline.

  int  upd_match(struct device *, void *, void *);
  void upd_attach(struct device *, struct device *, void *);
  int  upd_detach(struct device *, int);
  
  void upd_refresh(void *);
 -void upd_update_sensors(struct upd_softc *, uint8_t *, unsigned int, int);
 -void upd_intr(struct uhidev *, void *, uint);
 +void upd_request_sensor_refresh(struct upd_softc *, uint8_t, uint8_t, int);
 +void upd_update_sensor_cb(void *, int, void *, int);
 +void upd_mark_sensor_invalid(struct upd_softc *, struct upd_sensor *, int);
  struct upd_usage_entry *upd_lookup_usage_entry(const struct hid_item *);
  struct upd_sensor *upd_lookup_sensor(struct upd_softc *, int, int);
 +int upd_battery_present(struct upd_softc *);
 +void upd_update_batpres_sensor(struct upd_softc *, struct upd_sensor *,
 +uint8_t *, int);
 +void upd_update_batdep_sensor(struct upd_softc *, struct upd_sensor *,
 +uint8_t *, int);
 +void upd_update_sensor_value(struct upd_softc *, struct upd_sensor *,
 +uint8_t *, int);
 +void upd_intr(struct uhidev *, void *, uint);
 +
 +static struct upd_usage_entry upd_usage_table[] = {
 + { HUP_BATTERY,  HUB_REL_STATEOF_CHARGE,
 + SENSOR_PERCENT, RelativeStateOfCharge,
 + upd_update_batdep_sensor },
 + { HUP_BATTERY,  HUB_ABS_STATEOF_CHARGE,
 + SENSOR_PERCENT, AbsoluteStateOfCharge,
 + upd_update_batdep_sensor },
 + { HUP_BATTERY,  HUB_REM_CAPACITY,
 + SENSOR_PERCENT, RemainingCapacity,
 + upd_update_batdep_sensor },
 + { HUP_BATTERY,  HUB_FULLCHARGE_CAPACITY,
 + SENSOR_PERCENT, FullChargeCapacity,
 + upd_update_batdep_sensor },
 + { HUP_BATTERY,  HUB_CHARGING,
 + SENSOR_INDICATOR,   Charging,
 + upd_update_batdep_sensor },
 + { HUP_BATTERY,  HUB_DISCHARGING,
 + SENSOR_INDICATOR,   Discharging,
 + upd_update_batdep_sensor },
 + { HUP_BATTERY,  HUB_BATTERY_PRESENT,
 + SENSOR_INDICATOR,   BatteryPresent,
 + upd_update_batpres_sensor },
 + { HUP_POWER,HUP_SHUTDOWN_IMMINENT,
 + SENSOR_INDICATOR,   ShutdownImminent,
 + upd_update_sensor_value },
 + { HUP_BATTERY,  HUB_AC_PRESENT,
 + SENSOR_INDICATOR,   ACPresent,
 + upd_update_batdep_sensor },
 + { HUP_BATTERY,  HUB_ATRATE_TIMETOFULL,
 + SENSOR_TIMEDELTA,   AtRateTimeToFull,
 + upd_update_batdep_sensor }
 +};

I see that all the HUP_BATTERY sensors have the same dependency.   Why
not have a table for items without dependency (the parents) and a child
table per parent?  This way you would have only one upd_update_sensor
function.  This is really the key of this driver.  Why a flat list like
you have right now you need much more code, flags and customs functions.

  
  struct cfdriver upd_cd = {
   NULL, upd, DV_DULL
 @@ -183,38 +207,30 @@ upd_attach(struct device *parent, struct
sc-sc_num_sensors  sc-sc_max_sensors; ) {
   DPRINTF((upd: repid=%d\n, item.report_ID));
   if (item.kind != hid_feature ||
 - item.report_ID  0)
 + item.report_ID  0 ||
 + item.report_ID = sc-sc_max_repid)
   continue;
 -
   if ((entry = upd_lookup_usage_entry(item)) == NULL)
   continue;
 -
 - /* filter repeated usages, avoid duplicated sensors */
 - sensor = upd_lookup_sensor(sc, entry-usage_pg,
 -

cdce(4), cue(4), kue(4) and mos(4)

2015-03-26 Thread Martin Pieuchot
This spring the new trend has a name: if_input() !

If you are the owner of one of these USB Ethernet dongle, please do
me a favor a make sure they still work as expected with the diff
below.

Martin

Index: if_cdce.c
===
RCS file: /cvs/src/sys/dev/usb/if_cdce.c,v
retrieving revision 1.63
diff -u -p -r1.63 if_cdce.c
--- if_cdce.c   14 Mar 2015 03:38:49 -  1.63
+++ if_cdce.c   17 Mar 2015 21:45:15 -
@@ -726,6 +726,7 @@ cdce_rxeof(struct usbd_xfer *xfer, void 
struct cdce_softc   *sc = c-cdce_sc;
struct ifnet*ifp = GET_IFP(sc);
struct mbuf *m;
+   struct mbuf_list ml = MBUF_LIST_INITIALIZER();
int  total_len = 0;
int  s;
 
@@ -767,25 +768,16 @@ cdce_rxeof(struct usbd_xfer *xfer, void 
}
 
ifp-if_ipackets++;
-
m-m_pkthdr.len = m-m_len = total_len;
-   m-m_pkthdr.rcvif = ifp;
-
-   s = splnet();
+   ml_enqueue(ml, m);
 
if (cdce_newbuf(sc, c, NULL) == ENOBUFS) {
ifp-if_ierrors++;
-   goto done1;
+   goto done;
}
 
-#if NBPFILTER  0
-   if (ifp-if_bpf)
-   bpf_mtap(ifp-if_bpf, m, BPF_DIRECTION_IN);
-#endif
-
-   ether_input_mbuf(ifp, m);
-
-done1:
+   s = splnet();
+   if_input(ifp, ml);
splx(s);
 
 done:
Index: if_cdcef.c
===
RCS file: /cvs/src/sys/dev/usb/if_cdcef.c,v
retrieving revision 1.35
diff -u -p -r1.35 if_cdcef.c
--- if_cdcef.c  22 Dec 2014 02:28:52 -  1.35
+++ if_cdcef.c  17 Mar 2015 21:43:23 -
@@ -361,6 +361,7 @@ cdcef_rxeof(struct usbf_xfer *xfer, void
struct cdcef_softc  *sc = priv;
int total_len = 0;
struct ifnet*ifp = GET_IFP(sc);
+   struct mbuf_listml = MBUF_LIST_INITIALIZER();
struct mbuf *m = NULL;
 
 
@@ -403,32 +404,24 @@ cdcef_rxeof(struct usbf_xfer *xfer, void
goto done;
}
 
-   s = splnet();
if (ifp-if_flags  IFF_RUNNING) {
m = cdcef_newbuf();
if (m == NULL) {
/* message? */
ifp-if_ierrors++;
-   goto done1;
+   goto done;
}
 
m-m_pkthdr.len = m-m_len = total_len;
bcopy(sc-sc_buffer_out, mtod(m, char *), total_len);
-   m-m_pkthdr.rcvif = ifp;
 
ifp-if_ipackets++;
-
-#if NBPFILTER  0
-   if (ifp-if_bpf)
-   bpf_mtap(ifp-if_bpf, m, BPF_DIRECTION_IN);
-#endif
-
-   ether_input_mbuf(ifp, m);
+   ml_enqueue(ml, m);
}
 
-done1:
+   s = splnet();
+   if_input(ifp, ml);
splx(s);
-
 done:
/* Setup another xfer. */
usbf_setup_xfer(xfer, sc-sc_pipe_out, sc, sc-sc_buffer_out,
Index: if_cue.c
===
RCS file: /cvs/src/sys/dev/usb/if_cue.c,v
retrieving revision 1.69
diff -u -p -r1.69 if_cue.c
--- if_cue.c14 Mar 2015 03:38:49 -  1.69
+++ if_cue.c17 Mar 2015 21:45:06 -
@@ -674,6 +674,7 @@ cue_rxeof(struct usbd_xfer *xfer, void *
struct cue_chain*c = priv;
struct cue_softc*sc = c-cue_sc;
struct ifnet*ifp = GET_IFP(sc);
+   struct mbuf_listml = MBUF_LIST_INITIALIZER();
struct mbuf *m;
int total_len = 0;
u_int16_t   len;
@@ -721,26 +722,15 @@ cue_rxeof(struct usbd_xfer *xfer, void *
ifp-if_ipackets++;
m_adj(m, sizeof(u_int16_t));
m-m_pkthdr.len = m-m_len = total_len;
+   ml_enqueue(ml, m);
 
-   m-m_pkthdr.rcvif = ifp;
-
-   s = splnet();
-
-   /* XXX ugly */
if (cue_newbuf(sc, c, NULL) == ENOBUFS) {
ifp-if_ierrors++;
-   goto done1;
+   goto done;
}
 
-#if NBPFILTER  0
-   if (ifp-if_bpf)
-   bpf_mtap(ifp-if_bpf, m, BPF_DIRECTION_IN);
-#endif
-
-   DPRINTFN(10,(%s: %s: deliver %d\n, sc-cue_dev.dv_xname,
-   __func__, m-m_len));
-   ether_input_mbuf(ifp, m);
- done1:
+   s = splnet();
+   if_input(ifp, ml);
splx(s);
 
 done:
Index: if_kue.c
===
RCS file: /cvs/src/sys/dev/usb/if_kue.c,v
retrieving revision 1.78
diff -u -p -r1.78 if_kue.c
--- if_kue.c14 Mar 2015 03:38:49 -  1.78
+++ if_kue.c17 Mar 2015 21:44:56 -
@@ -676,6 +676,7 @@ kue_rxeof(struct usbd_xfer *xfer, void *
struct kue_chain*c = priv;
struct kue_softc*sc = c-kue_sc;
struct ifnet*ifp = GET_IFP(sc);
+   struct mbuf_listml = MBUF_LIST_INITIALIZER();
   

ef(4), eg(4), el(4), ex(4) and ie(4)

2015-03-26 Thread Martin Pieuchot
Even our ISA Ethernet drivers can be converted to if_input().  If you
still use some of these, I appreciate test reports.

I'm asking here because Miod said everybody can test them... hum hum.

Alternatively, if you think some drivers can go away, I'll summon
tedu@.

Index: isa/if_ef_isapnp.c
===
RCS file: /cvs/src/sys/dev/isa/if_ef_isapnp.c,v
retrieving revision 1.27
diff -u -p -r1.27 if_ef_isapnp.c
--- isa/if_ef_isapnp.c  22 Dec 2014 02:28:51 -  1.27
+++ isa/if_ef_isapnp.c  26 Mar 2015 11:29:22 -
@@ -671,6 +671,7 @@ efread(sc)
bus_space_tag_t iot = sc-sc_iot;
bus_space_handle_t ioh = sc-sc_ioh;
struct ifnet *ifp = sc-sc_arpcom.ac_if;
+   struct mbuf_list ml = MBUF_LIST_INITIALIZER();
struct mbuf *m;
int len;
 
@@ -719,13 +720,9 @@ efread(sc)
}
 
ifp-if_ipackets++;
+   ml_enqueue(ml, m);
 
-#if NBPFILTER  0
-   if (ifp-if_bpf)
-   bpf_mtap(ifp-if_bpf, m, BPF_DIRECTION_IN);
-#endif
-
-   ether_input_mbuf(ifp, m);
+   if_input(ifp, ml);
 }
 
 struct mbuf *
@@ -735,14 +732,12 @@ efget(sc, totlen)
 {
bus_space_tag_t iot = sc-sc_iot;
bus_space_handle_t ioh = sc-sc_ioh;
-   struct ifnet *ifp = sc-sc_arpcom.ac_if;
struct mbuf *top, **mp, *m;
int len, pad, s;
 
MGETHDR(m, M_DONTWAIT, MT_DATA);
if (m == NULL)
return (NULL);
-   m-m_pkthdr.rcvif = ifp;
m-m_pkthdr.len = totlen;
pad = ALIGN(sizeof(struct ether_header)) - sizeof(struct ether_header);
m-m_data += pad;
Index: isa/if_eg.c
===
RCS file: /cvs/src/sys/dev/isa/if_eg.c,v
retrieving revision 1.36
diff -u -p -r1.36 if_eg.c
--- isa/if_eg.c 22 Dec 2014 02:28:51 -  1.36
+++ isa/if_eg.c 26 Mar 2015 11:28:42 -
@@ -670,8 +670,9 @@ void
 egread(struct eg_softc *sc, caddr_t buf, int len)
 {
struct ifnet *ifp = sc-sc_arpcom.ac_if;
+   struct mbuf_list ml = MBUF_LIST_INITIALIZER();
struct mbuf *m;
-   
+
if (len = sizeof(struct ether_header) ||
len  ETHER_MAX_LEN) {
printf(%s: invalid packet size %d; dropping\n,
@@ -688,17 +689,9 @@ egread(struct eg_softc *sc, caddr_t buf,
}
 
ifp-if_ipackets++;
+   ml_enqueue(ml, m);
 
-#if NBPFILTER  0
-   /*
-* Check if there's a BPF listener on this interface.
-* If so, hand off the raw packet to BPF.
-*/
-   if (ifp-if_bpf)
-   bpf_mtap(ifp-if_bpf, m, BPF_DIRECTION_IN);
-#endif
-
-   ether_input_mbuf(ifp, m);
+   if_input(ifp, ml);
 }
 
 /*
@@ -707,14 +700,12 @@ egread(struct eg_softc *sc, caddr_t buf,
 struct mbuf *
 egget(struct eg_softc *sc, caddr_t buf, int totlen)
 {
-   struct ifnet *ifp = sc-sc_arpcom.ac_if;
struct mbuf *top, **mp, *m;
int len;
 
MGETHDR(m, M_DONTWAIT, MT_DATA);
if (m == 0)
return (0);
-   m-m_pkthdr.rcvif = ifp;
m-m_pkthdr.len = totlen;
len = MHLEN;
top = 0;
Index: isa/if_el.c
===
RCS file: /cvs/src/sys/dev/isa/if_el.c,v
retrieving revision 1.24
diff -u -p -r1.24 if_el.c
--- isa/if_el.c 22 Dec 2014 02:28:51 -  1.24
+++ isa/if_el.c 26 Mar 2015 11:28:38 -
@@ -490,6 +490,7 @@ elread(sc, len)
int len;
 {
struct ifnet *ifp = sc-sc_arpcom.ac_if;
+   struct mbuf_list ml = MBUF_LIST_INITIALIZER();
struct mbuf *m;
 
if (len = sizeof(struct ether_header) ||
@@ -508,17 +509,9 @@ elread(sc, len)
}
 
ifp-if_ipackets++;
+   ml_enqueue(ml, m);
 
-#if NBPFILTER  0
-   /*
-* Check if there's a BPF listener on this interface.
-* If so, hand off the raw packet to BPF.
-*/
-   if (ifp-if_bpf)
-   bpf_mtap(ifp-if_bpf, m, BPF_DIRECTION_IN);
-#endif
-
-   ether_input_mbuf(ifp, m);
+   if_input(ifp, ml);
 }
 
 /*
@@ -531,7 +524,6 @@ elget(sc, totlen)
struct el_softc *sc;
int totlen;
 {
-   struct ifnet *ifp = sc-sc_arpcom.ac_if;
int iobase = sc-sc_iobase;
struct mbuf *top, **mp, *m;
int len;
@@ -539,7 +531,6 @@ elget(sc, totlen)
MGETHDR(m, M_DONTWAIT, MT_DATA);
if (m == 0)
return 0;
-   m-m_pkthdr.rcvif = ifp;
m-m_pkthdr.len = totlen;
len = MHLEN;
top = 0;
Index: isa/if_ex.c
===
RCS file: /cvs/src/sys/dev/isa/if_ex.c,v
retrieving revision 1.37
diff -u -p -r1.37 if_ex.c
--- isa/if_ex.c 22 Dec 2014 02:28:51 -  1.37
+++ isa/if_ex.c 26 Mar 2015 11:24:27 -
@@ -639,6 +639,7 @@ void 
 ex_rx_intr(struct ex_softc *sc)
 {
struct ifnet *ifp = sc-arpcom.ac_if;
+   struct mbuf_list ml = MBUF_LIST_INITIALIZER();
int rx_status, 

Re: ef(4), eg(4), el(4), ex(4) and ie(4)

2015-03-26 Thread Martin Pieuchot
On 26/03/15(Thu) 08:00, Ted Unangst wrote:
 Martin Pieuchot wrote:
  Even our ISA Ethernet drivers can be converted to if_input().  If you
  still use some of these, I appreciate test reports.
  
  I'm asking here because Miod said everybody can test them... hum hum.
  
  Alternatively, if you think some drivers can go away, I'll summon
  tedu@.
 
 What, no ec? No ep? Why play favorites???

You're asking for tricky ones!

Enjoy :)


Index: dev/ic/elink3.c
===
RCS file: /cvs/src/sys/dev/ic/elink3.c,v
retrieving revision 1.83
diff -u -p -r1.83 elink3.c
--- dev/ic/elink3.c 14 Mar 2015 03:38:47 -  1.83
+++ dev/ic/elink3.c 26 Mar 2015 12:26:41 -
@@ -1243,8 +1243,9 @@ epread(struct ep_softc *sc)
bus_space_tag_t iot = sc-sc_iot;
bus_space_handle_t ioh = sc-sc_ioh;
struct ifnet *ifp = sc-sc_arpcom.ac_if;
+   struct mbuf_list ml = MBUF_LIST_INITIALIZER();
struct mbuf *m;
-   int len;
+   int len, error = 0;
 
len = bus_space_read_2(iot, ioh, ep_w1_reg(sc, EP_W1_RX_STATUS));
 
@@ -1275,11 +1276,12 @@ again:
 #endif
 
if (len  ERR_INCOMPLETE)
-   return;
+   goto done;
 
if (len  ERR_RX) {
++ifp-if_ierrors;
-   goto abort;
+   error = 1;
+   goto done;
}
 
len = RX_BYTES_MASK;   /* Lower 11 bits = RX bytes. */
@@ -1288,21 +1290,13 @@ again:
m = epget(sc, len);
if (m == NULL) {
ifp-if_ierrors++;
-   goto abort;
+   error = 1;
+   goto done;
}
 
++ifp-if_ipackets;
 
-#if NBPFILTER  0
-   /*
-* Check if there's a BPF listener on this interface.
-* If so, hand off the raw packet to BPF.
-*/
-   if (ifp-if_bpf)
-   bpf_mtap(ifp-if_bpf, m, BPF_DIRECTION_IN);
-#endif
-
-   ether_input_mbuf(ifp, m);
+   ml_enqueue(ml, m);
 
/*
 * In periods of high traffic we can actually receive enough
@@ -1331,15 +1325,14 @@ again:
sc-sc_dev.dv_xname);
 #endif
epreset(sc);
-   return;
+   goto done;
}
goto again;
}
-
-   return;
-
-abort:
-   ep_discard_rxtop(iot, ioh);
+done:
+   if (error)
+   ep_discard_rxtop(iot, ioh);
+   if_input(ifp, ml);
 }
 
 struct mbuf *
@@ -1347,7 +1340,6 @@ epget(struct ep_softc *sc, int totlen)
 {
bus_space_tag_t iot = sc-sc_iot;
bus_space_handle_t ioh = sc-sc_ioh;
-   struct ifnet *ifp = sc-sc_arpcom.ac_if;
struct mbuf *m;
caddr_t data;
int len, pad, off, sh, rxreg;
@@ -1368,7 +1360,6 @@ epget(struct ep_softc *sc, int totlen)
sc-next_mb = (sc-next_mb + 1) % MAX_MBS;
 
len = MCLBYTES;
-   m-m_pkthdr.rcvif = ifp;
m-m_pkthdr.len = totlen;
m-m_len = totlen;
pad = ALIGN(sizeof(struct ether_header)) - sizeof(struct ether_header);
Index: dev/ic/dp8390.c
===
RCS file: /cvs/src/sys/dev/ic/dp8390.c,v
retrieving revision 1.49
diff -u -p -r1.49 dp8390.c
--- dev/ic/dp8390.c 14 Mar 2015 03:38:47 -  1.49
+++ dev/ic/dp8390.c 26 Mar 2015 12:35:46 -
@@ -867,6 +867,7 @@ void
 dp8390_read(struct dp8390_softc *sc, int buf, u_short len)
 {
struct ifnet *ifp = sc-sc_arpcom.ac_if;
+   struct mbuf_list ml = MBUF_LIST_INITIALIZER();
struct mbuf *m;
 
/* Pull packet off interface. */
@@ -877,17 +878,9 @@ dp8390_read(struct dp8390_softc *sc, int
}
 
ifp-if_ipackets++;
+   ml_enqueue(ml, m);
 
-#if NBPFILTER  0
-   /*
-* Check if there's a BPF listener on this interface.
-* If so, hand off the raw packet to bpf.
-*/
-   if (ifp-if_bpf)
-   bpf_mtap(ifp-if_bpf, m, BPF_DIRECTION_IN);
-#endif
-
-   ether_input_mbuf(ifp, m);
+   if_input(ifp, ml);
 }
 
 
@@ -947,14 +940,12 @@ dp8390_getmcaf(struct arpcom *ac, u_int8
 struct mbuf *
 dp8390_get(struct dp8390_softc *sc, int src, u_short total_len)
 {
-   struct ifnet *ifp = sc-sc_arpcom.ac_if;
struct mbuf *m, *m0, *newm;
u_short len;
 
MGETHDR(m0, M_DONTWAIT, MT_DATA);
if (m0 == NULL)
return (0);
-   m0-m_pkthdr.rcvif = ifp;
m0-m_pkthdr.len = total_len;
len = MHLEN;
m = m0;



Small ifconfig output tweak for inet6?

2015-03-26 Thread Martin Pieuchot
How do people feel about printing the prefixlen in CIDR notation?  I'm
annoyed about outputs not fitting in 80 chars when using autoconf magic:

-inet6 fd00::f2de:f1ff:fe6a:15d1 prefixlen 64 autoconf pltime 3594 vltime 7194
+inet6 fd00::f2de:f1ff:fe6a:15d1/64 autoconf pltime 3594 vltime 7194

While here can I convert  autoconfprivacy to  privacy or +privacy?

Index: ifconfig.c
===
RCS file: /cvs/src/sbin/ifconfig/ifconfig.c,v
retrieving revision 1.296
diff -u -p -r1.296 ifconfig.c
--- ifconfig.c  5 Feb 2015 10:30:25 -   1.296
+++ ifconfig.c  26 Mar 2015 12:15:10 -
@@ -3192,7 +3192,7 @@ in6_alias(struct in6_ifreq *creq)
warn(SIOCGIFNETMASK_IN6);
} else {
sin6 = (struct sockaddr_in6 *)ifr6.ifr_addr;
-   printf( prefixlen %d, prefix(sin6-sin6_addr,
+   printf(/%d, prefix(sin6-sin6_addr,
sizeof(struct in6_addr)));
}
 
@@ -3216,7 +3216,7 @@ in6_alias(struct in6_ifreq *creq)
if (ifr6.ifr_ifru.ifru_flags6  IN6_IFF_AUTOCONF)
printf( autoconf);
if (ifr6.ifr_ifru.ifru_flags6  IN6_IFF_PRIVACY)
-   printf( autoconfprivacy);
+   printf( privacy);
}
 
if (scopeid)



kernel with gif(4)

2015-04-01 Thread Martin Pieuchot
I'd like to be able to easily identify how deep are the tentacles of a
pseudo-driver in your network stack.  This diff takes the example of
gif(4) and move all the remaining blocks checking for IFT_GIF under
the appropriate #ifdef dance.

The #ifdef I'm adding below are not strictly needed to compile a kernel
without gif(4) but they act as markers to limit code chunks that do not
need to be executed without the corresponding interface.

Ok?

Index: net/if_bridge.c
===
RCS file: /cvs/src/sys/net/if_bridge.c,v
retrieving revision 1.232
diff -u -p -r1.232 if_bridge.c
--- net/if_bridge.c 6 Feb 2015 22:10:43 -   1.232
+++ net/if_bridge.c 1 Apr 2015 12:00:11 -
@@ -1377,6 +1377,7 @@ bridge_input(struct ifnet *ifp, struct e
IF_ENQUEUE(sc-sc_if.if_snd, mc);
splx(s);
schednetisr(NETISR_BRIDGE);
+#if NGIF  0
if (ifp-if_type == IFT_GIF) {
TAILQ_FOREACH(ifl, sc-sc_iflist, next) {
if (ifl-ifp-if_type == IFT_ETHER)
@@ -1396,6 +1397,7 @@ bridge_input(struct ifnet *ifp, struct e
m = NULL;
}
}
+#endif /* NGIF */
return (m);
}
 
@@ -1446,11 +1448,13 @@ bridge_input(struct ifnet *ifp, struct e
 
m-m_pkthdr.rcvif = ifl-ifp;
m-m_pkthdr.ph_rtableid = ifl-ifp-if_rdomain;
+#if NGIF  0
if (ifp-if_type == IFT_GIF) {
m-m_flags |= M_PROTO1;
ether_input(ifl-ifp, eh, m);
m = NULL;
}
+#endif /* NGIF */
return (m);
}
if (bcmp(ac-ac_enaddr, eh-ether_shost, ETHER_ADDR_LEN) == 0
Index: netinet6/in6_ifattach.c
===
RCS file: /cvs/src/sys/netinet6/in6_ifattach.c,v
retrieving revision 1.86
diff -u -p -r1.86 in6_ifattach.c
--- netinet6/in6_ifattach.c 14 Mar 2015 03:38:52 -  1.86
+++ netinet6/in6_ifattach.c 1 Apr 2015 11:58:55 -
@@ -30,6 +30,8 @@
  * SUCH DAMAGE.
  */
 
+#include gif.h
+
 #include sys/param.h
 #include sys/systm.h
 #include sys/socket.h
@@ -198,6 +200,7 @@ get_hw_ifid(struct ifnet *ifp, struct in
}
break;
 
+#if NGIF  0
case IFT_GIF:
/*
 * RFC2893 says: SHOULD use IPv4 address as ifid source.
@@ -206,6 +209,7 @@ get_hw_ifid(struct ifnet *ifp, struct in
 * we don't do this.
 */
return -1;
+#endif /* NGIF */
 
default:
return -1;
Index: netinet6/nd6.c
===
RCS file: /cvs/src/sys/netinet6/nd6.c,v
retrieving revision 1.133
diff -u -p -r1.133 nd6.c
--- netinet6/nd6.c  25 Mar 2015 17:39:33 -  1.133
+++ netinet6/nd6.c  1 Apr 2015 12:02:14 -
@@ -30,6 +30,8 @@
  * SUCH DAMAGE.
  */
 
+#include gif.h
+
 #include sys/param.h
 #include sys/systm.h
 #include sys/timeout.h
@@ -1817,7 +1819,10 @@ nd6_need_cache(struct ifnet *ifp)
case IFT_PROPVIRTUAL:
case IFT_IEEE80211:
case IFT_CARP:
-   case IFT_GIF:   /* XXX need more cases? */
+#if NGIF  0
+   case IFT_GIF:
+#endif /* NGIF */
+   /* XXX need more cases? */
return (1);
default:
return (0);



Re: Use LIST for upd(4) sensors

2015-04-01 Thread Martin Pieuchot
On 31/03/15(Tue) 23:06, David Higgs wrote:
 This was much more straightforward than expected.
 
 - Replace an array with a LIST of allocated sensors.
 - Remove or rescope variables counting sensors.
 - Allocated sensors are always attached.
 - Drop an unnecessary size calculation.

Do you need this for an upcoming change?  I fail to see the benefit to
have a malloc(9) call per item, afaik theses items are really small and
you have at most 10 of them...

 Thanks.
 
 --david
 
 --- a/upd.c
 +++ b/upd.c
 @@ -23,6 +23,7 @@
  #include sys/kernel.h
  #include sys/malloc.h
  #include sys/device.h
 +#include sys/queue.h
  #include sys/sensors.h
  
  #include dev/usb/hid.h
 @@ -77,20 +78,18 @@ struct upd_report {
  struct upd_sensor {
   struct ksensor  ksensor;
   struct hid_item hitem;
 - int attached;
 + LIST_ENTRY(upd_sensor)  next;
  };
  
  struct upd_softc {
   struct uhidevsc_hdev;
 - int  sc_num_sensors;
   u_intsc_max_repid;
 - u_intsc_max_sensors;
  
   /* sensor framework */
   struct ksensordevsc_sensordev;
   struct sensor_task  *sc_sensortask;
   struct upd_report   *sc_reports;
 - struct upd_sensor   *sc_sensors;
 + LIST_HEAD(, upd_sensor)  sc_sensors;
  };
  
  int  upd_match(struct device *, void *, void *);
 @@ -155,14 +154,14 @@ upd_attach(struct device *parent, struct
   struct hid_data  *hdata;
   struct upd_usage_entry   *entry;
   struct upd_sensor*sensor;
 + int   num_sensors;
   int   size;
   void *desc;
  
   sc-sc_hdev.sc_intr = upd_intr;
   sc-sc_hdev.sc_parent = uha-parent;
   sc-sc_reports = NULL;
 - sc-sc_sensors = NULL;
 - sc-sc_max_sensors = nitems(upd_usage_table);
 + LIST_INIT(sc-sc_sensors);
  
   strlcpy(sc-sc_sensordev.xname, sc-sc_hdev.sc_dev.dv_xname,
   sizeof(sc-sc_sensordev.xname));
 @@ -173,14 +172,11 @@ upd_attach(struct device *parent, struct
  
   sc-sc_reports = mallocarray(sc-sc_max_repid,
   sizeof(struct upd_report), M_USBDEV, M_WAITOK | M_ZERO);
 - sc-sc_sensors = mallocarray(sc-sc_max_sensors,
 - sizeof(struct upd_sensor), M_USBDEV, M_WAITOK | M_ZERO);
 - size = sc-sc_max_sensors * sizeof(struct upd_sensor);
 - sc-sc_num_sensors = 0;
 + num_sensors = 0;
   uhidev_get_report_desc(uha-parent, desc, size);
   for (hdata = hid_start_parse(desc, size, hid_feature);
hid_get_item(hdata, item) 
 -  sc-sc_num_sensors  sc-sc_max_sensors; ) {
 +  num_sensors  nitems(upd_usage_table); ) {
   DPRINTF((upd: repid=%d\n, item.report_ID));
   if (item.kind != hid_feature ||
   item.report_ID  0 ||
 @@ -196,7 +192,8 @@ upd_attach(struct device *parent, struct
   if (sensor != NULL)
   continue;
  
 - sensor = sc-sc_sensors[sc-sc_num_sensors];
 + sensor = malloc(sizeof(struct upd_sensor), M_USBDEV,
 + M_WAITOK | M_ZERO);
   memcpy(sensor-hitem, item, sizeof(struct hid_item));
   strlcpy(sensor-ksensor.desc, entry-usage_name,
   sizeof(sensor-ksensor.desc));
 @@ -205,8 +202,8 @@ upd_attach(struct device *parent, struct
   sensor-ksensor.status = SENSOR_S_UNKNOWN;
   sensor-ksensor.value = 0;
   sensor_attach(sc-sc_sensordev, sensor-ksensor);
 - sensor-attached = 1;
 - sc-sc_num_sensors++;
 + LIST_INSERT_HEAD(sc-sc_sensors, sensor, next);
 + num_sensors++;
  
   if (sc-sc_reports[item.report_ID].enabled)
   continue;
 @@ -216,7 +213,7 @@ upd_attach(struct device *parent, struct
   sc-sc_reports[item.report_ID].enabled = 1;
   }
   hid_end_parse(hdata);
 - DPRINTF((upd: sc_num_sensors=%d\n, sc-sc_num_sensors));
 + DPRINTF((upd: num_sensors=%d\n, num_sensors));
  
   sc-sc_sensortask = sensor_task_register(sc, upd_refresh, 6);
   if (sc-sc_sensortask == NULL) {
 @@ -235,7 +232,7 @@ upd_detach(struct device *self, int flag
  {
   struct upd_softc*sc = (struct upd_softc *)self;
   struct upd_sensor   *sensor;
 - int  i;
 + struct upd_sensor   *t;
  
   if (sc-sc_sensortask != NULL) {
   wakeup(sc-sc_sensortask);
 @@ -244,15 +241,14 @@ upd_detach(struct device *self, int flag
  
   sensordev_deinstall(sc-sc_sensordev);
  
 - for (i = 0; i  sc-sc_num_sensors; i++) {
 - sensor = sc-sc_sensors[i];
 - if (sensor-attached)
 - sensor_detach(sc-sc_sensordev, sensor-ksensor);
 + LIST_FOREACH_SAFE(sensor, sc-sc_sensors, next, t) {
 + sensor_detach(sc-sc_sensordev, 

Re: add m_defrag to pcn driver

2015-04-01 Thread Martin Pieuchot
On 31/03/15(Tue) 21:56, Kimberley Manning wrote:
 Hi,
 
 This diff makes the pcn driver use m_defrag for fragmented mbuf chains,

I like this kind of cleanups.  As for vio(4) could you try the diff or
are you looking for testers?

Are you after something specific or are you changing the drivers for
coherency reason?


 
 cheers,
 Kim
 
 Index: if_pcn.c
 ===
 RCS file: /cvs/src/sys/dev/pci/if_pcn.c,v
 retrieving revision 1.36
 diff -u -p -r1.36 if_pcn.c
 --- if_pcn.c  14 Mar 2015 03:38:48 -  1.36
 +++ if_pcn.c  27 Mar 2015 12:17:24 -
 @@ -851,25 +851,23 @@ pcn_start(struct ifnet *ifp)
* were short on resources.  In this case, we'll copy
* and try again.
*/
 - if (bus_dmamap_load_mbuf(sc-sc_dmat, dmamap, m0,
 - BUS_DMA_WRITE|BUS_DMA_NOWAIT) != 0) {
 - MGETHDR(m, M_DONTWAIT, MT_DATA);
 - if (m == NULL)
 - break;
 - if (m0-m_pkthdr.len  MHLEN) {
 - MCLGET(m, M_DONTWAIT);
 - if ((m-m_flags  M_EXT) == 0) {
 - m_freem(m);
 - break;
 - }
 - }
 - m_copydata(m0, 0, m0-m_pkthdr.len, mtod(m, caddr_t));
 - m-m_pkthdr.len = m-m_len = m0-m_pkthdr.len;
 - error = bus_dmamap_load_mbuf(sc-sc_dmat, dmamap,
 - m, BUS_DMA_WRITE|BUS_DMA_NOWAIT);
 - if (error)
 - break;
 - }
 +error = bus_dmamap_load_mbuf(sc-sc_dmat, dmamap, m0,
 +BUS_DMA_WRITE|BUS_DMA_NOWAIT);
 +switch (error) {
 +case 0:
 +break; 
 +case EFBIG:
 +if ((error = m_defrag(m0, M_DONTWAIT)) == 0 
 +(error = bus_dmamap_load_mbuf(sc-sc_dmat, 
 dmamap,
 + m0, BUS_DMA_WRITE|BUS_DMA_NOWAIT)) == 0)
 +break;
 +
 +/* FALLTHROUGH */
 +default:
 + IFQ_DEQUEUE(ifp-if_snd, m0);
 + m_freem(m);
 + continue;
 +}
  
   /*
* Ensure we have enough descriptors free to describe
 



if_input() and `rcvif`

2015-04-01 Thread Martin Pieuchot
When if_input_process() will pass a mbuf to pseudo-interface handlers,
they will change the `rcvif` pointer in the packet header.  That's why
we should not pass a ifp pointer to the handlers and instead let them
look at the value of `rcvif`.

Diff below change if_input() and ether_input() to no longer take an
ifp as argument.

Ok?

Index: net/if.c
===
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.325
diff -u -p -r1.325 if.c
--- net/if.c1 Apr 2015 04:00:55 -   1.325
+++ net/if.c1 Apr 2015 14:09:39 -
@@ -496,7 +496,7 @@ if_input_process(void *xmq)
 
ifp = m-m_pkthdr.rcvif;
SLIST_FOREACH(ifih, ifp-if_inputs, ifih_next) {
-   if ((*ifih-ifih_input)(ifp, NULL, m))
+   if ((*ifih-ifih_input)(m, NULL))
break;
}
}
Index: net/if_bridge.c
===
RCS file: /cvs/src/sys/net/if_bridge.c,v
retrieving revision 1.232
diff -u -p -r1.232 if_bridge.c
--- net/if_bridge.c 6 Feb 2015 22:10:43 -   1.232
+++ net/if_bridge.c 1 Apr 2015 14:11:00 -
@@ -1391,7 +1391,7 @@ bridge_input(struct ifnet *ifp, struct e
BPF_DIRECTION_IN);
 #endif
m-m_flags |= M_PROTO1;
-   ether_input(ifl-ifp, eh, m);
+   ether_input(m, eh);
ifl-ifp-if_ipackets++;
m = NULL;
}
@@ -1448,7 +1448,7 @@ bridge_input(struct ifnet *ifp, struct e
m-m_pkthdr.ph_rtableid = ifl-ifp-if_rdomain;
if (ifp-if_type == IFT_GIF) {
m-m_flags |= M_PROTO1;
-   ether_input(ifl-ifp, eh, m);
+   ether_input(m, eh);
m = NULL;
}
return (m);
@@ -1624,7 +1624,7 @@ bridge_localbroadcast(struct bridge_soft
BPF_DIRECTION_IN);
 #endif
 
-   ether_input(ifp, NULL, m1);
+   ether_input(m1, NULL);
ifp-if_ipackets++;
 }
 
Index: net/if_ethersubr.c
===
RCS file: /cvs/src/sys/net/if_ethersubr.c,v
retrieving revision 1.190
diff -u -p -r1.190 if_ethersubr.c
--- net/if_ethersubr.c  17 Mar 2015 14:51:27 -  1.190
+++ net/if_ethersubr.c  1 Apr 2015 14:11:13 -
@@ -454,15 +454,15 @@ bad:
  * the ether header, which is provided separately.
  */
 int
-ether_input(struct ifnet *ifp0, void *hdr, struct mbuf *m)
+ether_input(struct mbuf *m, void *hdr)
 {
+   struct ifnet *ifp0, *ifp;
struct ether_header *eh = hdr;
struct ifqueue *inq;
u_int16_t etype;
int s, llcfound = 0;
struct llc *l;
struct arpcom *ac;
-   struct ifnet *ifp = ifp0;
 #if NTRUNK  0
int i = 0;
 #endif
@@ -470,7 +470,9 @@ ether_input(struct ifnet *ifp0, void *hd
struct ether_header *eh_tmp;
 #endif
 
+
/* mark incoming routing table */
+   ifp = ifp0 = m-m_pkthdr.rcvif;
m-m_pkthdr.ph_rtableid = ifp-if_rdomain;
 
if (eh == NULL) {
Index: net/if_vlan.c
===
RCS file: /cvs/src/sys/net/if_vlan.c,v
retrieving revision 1.113
diff -u -p -r1.113 if_vlan.c
--- net/if_vlan.c   31 Mar 2015 11:47:09 -  1.113
+++ net/if_vlan.c   1 Apr 2015 14:05:23 -
@@ -352,7 +352,7 @@ vlan_input(struct ether_header *eh, stru
}
 
ifv-ifv_if.if_ipackets++;
-   ether_input(ifv-ifv_if, eh, m);
+   ether_input(m, eh);
 
return (0);
 }
Index: net/if_vxlan.c
===
RCS file: /cvs/src/sys/net/if_vxlan.c,v
retrieving revision 1.22
diff -u -p -r1.22 if_vxlan.c
--- net/if_vxlan.c  14 Mar 2015 03:38:51 -  1.22
+++ net/if_vxlan.c  1 Apr 2015 14:05:36 -
@@ -537,7 +537,7 @@ vxlan_lookup(struct mbuf *m, struct udph
 #endif
 
ifp-if_ipackets++;
-   ether_input(ifp, eh, m);
+   ether_input(m, eh);
 
/* success */
return (1);
Index: net/if_var.h
===
RCS file: /cvs/src/sys/net/if_var.h,v
retrieving revision 1.23
diff -u -p -r1.23 if_var.h
--- net/if_var.h1 Apr 2015 04:00:55 -   1.23
+++ net/if_var.h1 Apr 2015 14:09:30 -
@@ -114,7 +114,7 @@ struct  ifqueue {
  */
 struct ifih {
SLIST_ENTRY(ifih) ifih_next;
-   int (*ifih_input)(struct ifnet *, void *, struct mbuf *);
+   int (*ifih_input)(struct mbuf *, void *);
 };
 
 /*
@@ -422,7 +422,7 @@ voidether_input_mbuf(struct ifnet *, st
 void   

Re: Small ifconfig output tweak for inet6?

2015-03-26 Thread Martin Pieuchot
On 26/03/15(Thu) 17:46, Henning Brauer wrote:
 * Mike Belopuhov m...@belopuhov.com [2015-03-26 14:36]:
  On 26 March 2015 at 14:27, Stuart Henderson st...@openbsd.org wrote:
   seems reasonable. (I'd quite like that for v4 too, though it wouldn't
   cope with non-contiguous netmask ;)
  non-contiguous netmasks for IPv4 addresses configured on an interface?
  is that possible?  what's the use case?
  perhaps you're confusing this with  non-contiguous netmasks in the radix
  tree that are entered by the ipsec flows containing port numbers?
 
 I don't think we need to worry about non-contiguous netmasks here.

My plan is to stop supporting them in the routing table first...  Does
that ring any bell? :o)

So let's start simple, CIDR notation for IPv6, ok?

Index: ifconfig.c
===
RCS file: /cvs/src/sbin/ifconfig/ifconfig.c,v
retrieving revision 1.296
diff -u -p -r1.296 ifconfig.c
--- ifconfig.c  5 Feb 2015 10:30:25 -   1.296
+++ ifconfig.c  26 Mar 2015 17:15:54 -
@@ -3192,7 +3192,7 @@ in6_alias(struct in6_ifreq *creq)
warn(SIOCGIFNETMASK_IN6);
} else {
sin6 = (struct sockaddr_in6 *)ifr6.ifr_addr;
-   printf( prefixlen %d, prefix(sin6-sin6_addr,
+   printf(/%d, prefix(sin6-sin6_addr,
sizeof(struct in6_addr)));
}
 



Re: ef(4), eg(4), el(4), ex(4) and ie(4)

2015-03-31 Thread Martin Pieuchot
On 31/03/15(Tue) 18:06, Miod Vallat wrote:
   What, no ec? No ep? Why play favorites???
  
  You're asking for tricky ones!
  
  Enjoy :)
 
 I'm afraid my ISA ec(4) seems to no longer work. Blinks red during POST,
 doesn't get detected by the kernel (yes, it's jumped correctly).

Does that mean we can cvs rm the driver? :)



Re: touchpad slight regression (snap: 20141121-20150217)

2015-02-28 Thread Martin Pieuchot
[moved to tech@]

On 27/02/15(Fri) 11:40, patrick keshishian wrote:
 Hi,
 
 On 2/26/15, Ulf Brosziewski ulf.brosziew...@t-online.de wrote:
  On 02/27/2015 03:31 AM, Ulf Brosziewski wrote:
  ...
  It might be that the following patch to wsmouse.c solves the problem
  with the new version of wsconscomm. Tests would be welcome (I could
  only verify that the patch does no harm to other touchpad types, i.e.,
  Elantech-v4 and Alps Glidepoint).
 [...]
 
  Sorry, the change was in the wrong place and would only do a half of
  the work. It should look like:
 
  Index: wsmouse.c
  ===
  RCS file: /cvs/src/sys/dev/wscons/wsmouse.c,v
  retrieving revision 1.26
  diff -u -p -r1.26 wsmouse.c
  --- wsmouse.c   27 Oct 2014 13:55:05 -  1.26
  +++ wsmouse.c   27 Feb 2015 02:50:06 -
  @@ -433,6 +433,9 @@ wsmouse_input(struct device *wsmousedev,
  }
  }
 
  +   if (sc-sc_z == 0)
  +   sc-sc_w = INVALID_W;
  +
  mb = sc-sc_mb;
  while ((d = mb ^ ub) != 0) {
  /*
 
 I can confirm this change alone causes no adverse, observable
 change on my x120e's touchpad.

I the long term, I think that having a similar logic in wsmouse_input()
makes sense.  As I already told Ulf, it would be really nice to improve
the wsmouse(4)/wscons(4) layer to support modern touchpad features and
get rid of wsconscomm.

But right now this chunk is really intrusive.  Plus I believe it should
only be applied on touchpad ``native'' mode.  Sure, we could be able to
check for WSMOUSE_INPUT_ABSOLUTE_W, but can it be a bad idea?

What about putting a similar chunk in pms_proc_synpatics() instead?

 However, I would appreciate it if someone could enlighten me
 as to what the Z and W axis refer.

I'm glad you asked, because it really depends on which hardware you're
using :)  Plus right now our code use magic values which are most of
the time not documented.  Diffs are more than welcome to improve the
situation.

Martin



Re: USBD_NO_COPY problems

2015-02-28 Thread Martin Pieuchot
On 19/02/15(Thu) 21:49, David Higgs wrote:
 On Feb 13, 2015, at 7:29 AM, David Higgs hig...@gmail.com wrote:
  On Friday, February 13, 2015, Martin Pieuchot mpieuc...@nolizard.org 
  wrote:
  On 13/02/15(Fri) 00:28, David Higgs wrote:
   I guess nobody else has tried calling uhidev_get_report_async() yet.  :)
  
   First I was getting a NULL pointer deref in the uhidev async callback.
   Then I realized that due to USBD_NO_COPY, xfer-buffer was always
   NULL.  Next, I tried to use the DMA buffer, but I ended up in DDB in a
   very cryptic way.  I believe this is because the DMA buffer isn't
   available when the callback is invoked.
  
   For the async callback to get a valid dmabuf, it needs to be invoked
   prior to usb_freemem() in usbd_transfer_complete().  The xfer-status
   determination would need to move up too.  I'd do this myself but I
   don't understand the logic and ordering of pipe-repeat stuff, and am
   concerned about unintentionally breaking other devices.
  
   This is partially my fault, because I tested the original diff that
   added the USBD_NO_COPY semantics to verify that it didn't break my
   synchronous code paths, but hadn't yet written anything for upd(4) to
   check the async ones.
  
  Does the diff below help? 
  
  Partially but not enough.  I had already figured out that I needed that to 
  solve the NULL pointer dereference.  See my 2nd paragraph above.
  
 OK, I figured out my issue - the crazy DDB backtrace is produced when you 
 execute a NULL callback.
 
 It still doesn’t seem legal for the callback to access DMA buffer contents 
 after they are “freed”.  I assume this won’t work in all cases (host 
 controllers / architectures / cache behaviors), but I don’t experience any 
 problems in my i386 VM.  I tried reordering parts of 
 usbd_transfer_complete(), but DIAGNOSTIC code became very unhappy with the 
 results.
 
 Fortunately, the diff below doesn’t touch that code path and just fixes the 
 uhidev layer.  My async upd(4) changes will be forthcoming in a different 
 thread.

Committed since nothing uses it at the moment.  Thanks!

Martin



Re: fix stuck cancelled ehci xfers (ehci_idone: ex=%p is done!)

2015-02-27 Thread Martin Pieuchot
On 27/02/15(Fri) 12:07, Stefan Sperling wrote:
 On Thu, Feb 26, 2015 at 10:45:01PM +0100, Stefan Sperling wrote:
  There are other issues which are not fixed by this diff and which
  my diff did not fix either.
 
  But these are bugs we can investigate another day. Happy travels!
 
 I suppose there might well be power supply issues when chaining too many
 passive hubs and devices off a single USB port on the host controller.
 I've now put an active USB hub (separate power source) at the end of
 my long-cable hub and so far things are looking good. I'll report back
 if my problems appear in this configuration, too.
 
 My setup changed from
 
 [ehci host]--[usb extension cable hub]--[usb keyboard (+hub)]--[usb mouse]
 
 to this:
  [power adapter]
|
V
 [ehci host]--[usb extension cable hub]--[active hub]--[usb keyboard (+hub)]
   ^
   |
   +--[usb mouse]

Well there's an obvious bug to fix in our stack regarding HUB power
sources, I don't know if that's the one you're hitting but we can try :)

When a device is attached the stack gets its power usage from the
configuration descriptor.  But the logic to check if the hub this device
is attached to has enough power budget is dumb and does not work well
with multiple devices below the same hub.

Instead of making a guess about how much power can be drained by a port
we should really keep track of the power available per hub. 

If you're curious grep for 'powersrc' in sys/dev/usb.

I have this on my TODO list because SuperSpeed devices do things
slightly differently.  I'll try to cook a diff, but it will be for after
5.7.



Re: fix stuck cancelled ehci xfers (ehci_idone: ex=%p is done!)

2015-02-27 Thread Martin Pieuchot
On 26/02/15(Thu) 22:45, Stefan Sperling wrote:
 On Thu, Feb 26, 2015 at 01:51:29AM +0100, Martin Pieuchot wrote:
  On 22/02/15(Sun) 00:54, Martin Pieuchot wrote:
   On 20/02/15(Fri) 23:15, Stefan Sperling wrote:
In the following configuration I can pretty easily trigger endless
screenfulls of scrolling ehci_idone: ex=%p is done! messages,
where %p is to a constant pointer value (same in each message).

[ehci host]--[usb extension cable (hub 1)]--[usb keyboard (hub 
2)]--[usb mouse]

So there's a USB extension cable (hub 1) that I plug a keyboard into.
The keyboard in turn has a built-in hub that has a mouse plugged into 
it.
The problem happens almost every time I pull the keyboard out of hub 1.

I've also seen it happen without any intervention on my part (In fact
I was in a different city and couldn't use the box until I got home
several hours later and hit the reset switch... that's no fun)

So if a transfer is cancelled (e.g. as a result of pulling the plug), 
then:

- usbd_abort_pipe wants to abort a related transfer
- ehci_abort_xfer schedules and waits for ehci_softintr, expecting
  the softintr routine to deal with the cancelled transfer:

/*
 * Step 3: Make sure the soft interrupt routine has run. This
 * should remove any completed items off the queue.
 * The hardware has no reference to completed items (TDs).
 * It's safe to remove them at any time.
 */
s = splusb();
sc-sc_softwake = 1;
usb_schedsoftintr(sc-sc_bus);
tsleep(sc-sc_softwake, PZERO, ehciab, 0);

- ehci_softintr gets scheduled
- ehci_softintr loops over xfers on the sc_intrhead TAILQ and
  invokes ehci_check_intr on each
- ehci_check_intr eventually ends up calling ehci_idone
- ehci_idone does nothing for cancelled transfers... ?!?

if (xfer-status == USBD_CANCELLED ||
xfer-status == USBD_TIMEOUT) {
DPRINTF((ehci_idone: aborted xfer=%p\n, xfer));
return;
}

- something else happens

- ehci_abort_xfer awakes from tsleep and sets ex-isdone, since it
  expects the softinterrupt routine to have dealt with the xfer

- something else happens

- the host controller sends an INTERR interrupt
- ehci_intr1 schedules ehci_softintr
- ehci_softintr loops over xfers on the sc_intrhead TAILQ and
  invokes ehci_check_intr on each
- the cancelled xfer is still in the intrhead TAILQ and ends up in 
ehci_idone
- ehci_idone looks for the isdone flag which is now set, then it
  complains and does nothing

- the host controller sends an INTERR interrupt
... same story again, we get an endless loop

This diff breaks the chain of events and fixes the endless loop for me.
I can't reproduce the problem anymore by pulling the keyboard out.
I don't quite understand how this prevents the flood of INTERR 
interrupts
but it seems to work.

I assume there are nasty tentacles in USB land which I'm unfamiliar 
with.
Is there any reason this could be a bad idea?
   
   Stefan that's a really good analysis.  I think the diff might not be
   completely correct though.
   
   So basically you're removing the transfer from the active list.  That's
   generally done after the USB callback has been executed, in your case in
   ehci_device_intr_done().
   
   For interrupt transfers (pipe-repeat is 1) the transfer is kept on the
   list while the descriptors are freed/reallocated.  That should be safe
   since we should be reusing the sames.  
   
   So I don't know if we are missing a spl protection of if there's an xfer 
   leak but I'm afraid that with your diff usb_transfer_complete() might
   not be called for the failing xfer.
   
   That's easy to check, look if the ehcixfer pool counter increase when
   you detach your device.
   
   I'm afraid I cannot help more as I am currently traveling :)
  
  Here's an alternative diff that removes xfers from the list of
  currently pending transfers as soon as they are finished or
  cancelled.  This should also prevent the race you analysed.
  
  With this diff ehci_idone() should no longer be called on a
  CANCELLED or TIMEOUT xfer which mean the 'isdone' check can
  be removed.
  
  Does it work for you?
 
 It does, yes. Thanks.
 Your diff also stops the vomit of ehci_idone messages for me when
 I unplug devices or hubs.

Does that mean you're ok with the diff?

 There are other issues which are not fixed by this diff and which
 my diff did not fix either.
 
 Sometimes my USB keyboard seems to go insane and keeps pressing a key
 repeatedly even though I'm not pressing it. For example, when I typed
   make install
 today the keyboard kept repeating the letter l so the line looked
 like

Re: fix stuck cancelled ehci xfers (ehci_idone: ex=%p is done!)

2015-02-21 Thread Martin Pieuchot
On 20/02/15(Fri) 23:15, Stefan Sperling wrote:
 In the following configuration I can pretty easily trigger endless
 screenfulls of scrolling ehci_idone: ex=%p is done! messages,
 where %p is to a constant pointer value (same in each message).
 
 [ehci host]--[usb extension cable (hub 1)]--[usb keyboard (hub 2)]--[usb 
 mouse]
 
 So there's a USB extension cable (hub 1) that I plug a keyboard into.
 The keyboard in turn has a built-in hub that has a mouse plugged into it.
 The problem happens almost every time I pull the keyboard out of hub 1.
 
 I've also seen it happen without any intervention on my part (In fact
 I was in a different city and couldn't use the box until I got home
 several hours later and hit the reset switch... that's no fun)
 
 So if a transfer is cancelled (e.g. as a result of pulling the plug), then:
 
 - usbd_abort_pipe wants to abort a related transfer
 - ehci_abort_xfer schedules and waits for ehci_softintr, expecting
   the softintr routine to deal with the cancelled transfer:
 
   /*
* Step 3: Make sure the soft interrupt routine has run. This
* should remove any completed items off the queue.
* The hardware has no reference to completed items (TDs).
* It's safe to remove them at any time.
*/
   s = splusb();
   sc-sc_softwake = 1;
   usb_schedsoftintr(sc-sc_bus);
   tsleep(sc-sc_softwake, PZERO, ehciab, 0);
 
 - ehci_softintr gets scheduled
 - ehci_softintr loops over xfers on the sc_intrhead TAILQ and
   invokes ehci_check_intr on each
 - ehci_check_intr eventually ends up calling ehci_idone
 - ehci_idone does nothing for cancelled transfers... ?!?
 
   if (xfer-status == USBD_CANCELLED ||
   xfer-status == USBD_TIMEOUT) {
   DPRINTF((ehci_idone: aborted xfer=%p\n, xfer));
   return;
   }
 
 - something else happens
 
 - ehci_abort_xfer awakes from tsleep and sets ex-isdone, since it
   expects the softinterrupt routine to have dealt with the xfer
 
 - something else happens
 
 - the host controller sends an INTERR interrupt
 - ehci_intr1 schedules ehci_softintr
 - ehci_softintr loops over xfers on the sc_intrhead TAILQ and
   invokes ehci_check_intr on each
 - the cancelled xfer is still in the intrhead TAILQ and ends up in ehci_idone
 - ehci_idone looks for the isdone flag which is now set, then it
   complains and does nothing
 
 - the host controller sends an INTERR interrupt
 ... same story again, we get an endless loop
 
 This diff breaks the chain of events and fixes the endless loop for me.
 I can't reproduce the problem anymore by pulling the keyboard out.
 I don't quite understand how this prevents the flood of INTERR interrupts
 but it seems to work.
 
 I assume there are nasty tentacles in USB land which I'm unfamiliar with.
 Is there any reason this could be a bad idea?

Stefan that's a really good analysis.  I think the diff might not be
completely correct though.

So basically you're removing the transfer from the active list.  That's
generally done after the USB callback has been executed, in your case in
ehci_device_intr_done().

For interrupt transfers (pipe-repeat is 1) the transfer is kept on the
list while the descriptors are freed/reallocated.  That should be safe
since we should be reusing the sames.  

So I don't know if we are missing a spl protection of if there's an xfer 
leak but I'm afraid that with your diff usb_transfer_complete() might
not be called for the failing xfer.

That's easy to check, look if the ehcixfer pool counter increase when
you detach your device.

I'm afraid I cannot help more as I am currently traveling :)

 
 Index: ehci.c
 ===
 RCS file: /cvs/src/sys/dev/usb/ehci.c,v
 retrieving revision 1.174
 diff -u -p -r1.174 ehci.c
 --- ehci.c9 Feb 2015 22:14:43 -   1.174
 +++ ehci.c20 Feb 2015 21:32:40 -
 @@ -811,6 +811,7 @@ done:
  void
  ehci_idone(struct usbd_xfer *xfer)
  {
 + struct ehci_softc *sc = (struct ehci_softc *)xfer-device-bus;
   struct ehci_xfer *ex = (struct ehci_xfer *)xfer;
  #ifdef EHCI_DEBUG
   struct ehci_pipe *epipe = (struct ehci_pipe *)xfer-pipe;
 @@ -839,6 +840,8 @@ ehci_idone(struct usbd_xfer *xfer)
  #endif
   if (xfer-status == USBD_CANCELLED ||
   xfer-status == USBD_TIMEOUT) {
 + if (ehci_active_intr_list(ex))
 + ehci_del_intr_list(sc, ex);
   DPRINTF((ehci_idone: aborted xfer=%p\n, xfer));
   return;
   }
 



Re: fix stuck cancelled ehci xfers (ehci_idone: ex=%p is done!)

2015-02-25 Thread Martin Pieuchot
On 22/02/15(Sun) 00:54, Martin Pieuchot wrote:
 On 20/02/15(Fri) 23:15, Stefan Sperling wrote:
  In the following configuration I can pretty easily trigger endless
  screenfulls of scrolling ehci_idone: ex=%p is done! messages,
  where %p is to a constant pointer value (same in each message).
  
  [ehci host]--[usb extension cable (hub 1)]--[usb keyboard (hub 2)]--[usb 
  mouse]
  
  So there's a USB extension cable (hub 1) that I plug a keyboard into.
  The keyboard in turn has a built-in hub that has a mouse plugged into it.
  The problem happens almost every time I pull the keyboard out of hub 1.
  
  I've also seen it happen without any intervention on my part (In fact
  I was in a different city and couldn't use the box until I got home
  several hours later and hit the reset switch... that's no fun)
  
  So if a transfer is cancelled (e.g. as a result of pulling the plug), then:
  
  - usbd_abort_pipe wants to abort a related transfer
  - ehci_abort_xfer schedules and waits for ehci_softintr, expecting
the softintr routine to deal with the cancelled transfer:
  
  /*
   * Step 3: Make sure the soft interrupt routine has run. This
   * should remove any completed items off the queue.
   * The hardware has no reference to completed items (TDs).
   * It's safe to remove them at any time.
   */
  s = splusb();
  sc-sc_softwake = 1;
  usb_schedsoftintr(sc-sc_bus);
  tsleep(sc-sc_softwake, PZERO, ehciab, 0);
  
  - ehci_softintr gets scheduled
  - ehci_softintr loops over xfers on the sc_intrhead TAILQ and
invokes ehci_check_intr on each
  - ehci_check_intr eventually ends up calling ehci_idone
  - ehci_idone does nothing for cancelled transfers... ?!?
  
  if (xfer-status == USBD_CANCELLED ||
  xfer-status == USBD_TIMEOUT) {
  DPRINTF((ehci_idone: aborted xfer=%p\n, xfer));
  return;
  }
  
  - something else happens
  
  - ehci_abort_xfer awakes from tsleep and sets ex-isdone, since it
expects the softinterrupt routine to have dealt with the xfer
  
  - something else happens
  
  - the host controller sends an INTERR interrupt
  - ehci_intr1 schedules ehci_softintr
  - ehci_softintr loops over xfers on the sc_intrhead TAILQ and
invokes ehci_check_intr on each
  - the cancelled xfer is still in the intrhead TAILQ and ends up in 
  ehci_idone
  - ehci_idone looks for the isdone flag which is now set, then it
complains and does nothing
  
  - the host controller sends an INTERR interrupt
  ... same story again, we get an endless loop
  
  This diff breaks the chain of events and fixes the endless loop for me.
  I can't reproduce the problem anymore by pulling the keyboard out.
  I don't quite understand how this prevents the flood of INTERR interrupts
  but it seems to work.
  
  I assume there are nasty tentacles in USB land which I'm unfamiliar with.
  Is there any reason this could be a bad idea?
 
 Stefan that's a really good analysis.  I think the diff might not be
 completely correct though.
 
 So basically you're removing the transfer from the active list.  That's
 generally done after the USB callback has been executed, in your case in
 ehci_device_intr_done().
 
 For interrupt transfers (pipe-repeat is 1) the transfer is kept on the
 list while the descriptors are freed/reallocated.  That should be safe
 since we should be reusing the sames.  
 
 So I don't know if we are missing a spl protection of if there's an xfer 
 leak but I'm afraid that with your diff usb_transfer_complete() might
 not be called for the failing xfer.
 
 That's easy to check, look if the ehcixfer pool counter increase when
 you detach your device.
 
 I'm afraid I cannot help more as I am currently traveling :)

Here's an alternative diff that removes xfers from the list of
currently pending transfers as soon as they are finished or
cancelled.  This should also prevent the race you analysed.

With this diff ehci_idone() should no longer be called on a
CANCELLED or TIMEOUT xfer which mean the 'isdone' check can
be removed.

Does it work for you?

Index: ehci.c
===
RCS file: /cvs/src/sys/dev/usb/ehci.c,v
retrieving revision 1.174
diff -u -p -r1.174 ehci.c
--- ehci.c  9 Feb 2015 22:14:43 -   1.174
+++ ehci.c  25 Feb 2015 06:04:49 -
@@ -206,11 +206,7 @@ void   ehci_dump_exfer(struct ehci_xfer *
 #define ehci_add_intr_list(sc, ex) \
TAILQ_INSERT_TAIL((sc)-sc_intrhead, (ex), inext);
 #define ehci_del_intr_list(sc, ex) \
-   do { \
-   TAILQ_REMOVE(sc-sc_intrhead, (ex), inext); \
-   (ex)-inext.tqe_prev = NULL; \
-   } while (0)
-#define ehci_active_intr_list(ex) ((ex)-inext.tqe_prev != NULL)
+   TAILQ_REMOVE((sc)-sc_intrhead, (ex), inext);
 
 struct usbd_bus_methods ehci_bus_methods = {
.open_pipe = ehci_open,
@@ -753,6 +749,7 @@ ehci_check_qh_intr(struct ehci_softc *sc
}
  done

Re: Small ifconfig output tweak for inet6?

2015-03-26 Thread Martin Pieuchot
On 26/03/15(Thu) 17:39, Florian Obser wrote:
 On Thu, Mar 26, 2015 at 01:48:03PM +0100, Martin Pieuchot wrote:
  How do people feel about printing the prefixlen in CIDR notation?  I'm
  annoyed about outputs not fitting in 80 chars when using autoconf magic:
  
  -inet6 fd00::f2de:f1ff:fe6a:15d1 prefixlen 64 autoconf pltime 3594 vltime 
  7194
  +inet6 fd00::f2de:f1ff:fe6a:15d1/64 autoconf pltime 3594 vltime 7194
 
 for real prefixes this still doesn't fit:
 inet6 :BBB::1:5054:ff:fedc:6fcd/64 autoconf pltime 604776 
 vltime 2591976
 inet6 :BBB::1:e02b:adec:a4ce:f04d/64 autoconf privacy pltime 
 85586 vltime 604173

What are you suggesting?  To not print 'autoconf'?  Use less left
margin?  Move the 80char limit to 100?  That the CIDR notation is
not worth it?



Re: Small ifconfig output tweak for inet6?

2015-03-26 Thread Martin Pieuchot
On 26/03/15(Thu) 17:35, Florian Obser wrote:
 On Thu, Mar 26, 2015 at 05:46:12PM +0100, Henning Brauer wrote:
  * Mike Belopuhov m...@belopuhov.com [2015-03-26 14:36]:
   however I agree that if we do this for ipv6 we should do it for ipv4 as 
   well
   but then do we care about tons of stuff out there parsing ifconfig output?
  
  that's the prime question. I would love to move to CIDR notation - are
  we breaking people's scripts with that? The inet side has been the same
  for, what, decades?
 
 Of course this breaks stuff :)

 Diff at the end (for those interested) shows in which way it breaks
 ansible - which I understand the cool kids use these days...
 Note to self: ansible should care about pltime...
 
 We could port libxo.
 /me runs away giggling like an idiot

Well just use inet_net_pton(3) and call it a day.  You see, I'm trying
to help people writing parsers ;)



ehci(4) Full-speed isochronous transfers support

2015-03-28 Thread Martin Pieuchot
With the increasing number of machines shipping with rate-matching
hubs instead of companion controllers to support USB Full and Low-
speed devices, a number of people asked me if it was possible to
add support for Full-speed isochronous transfers in order to use
USB1.1 uaudio(4) devices with ehci(4)-only systems.

The diff below does that.  It also contain some cleanups for the
High-speed isochronous code and plug some memory leaks for free.

Please let me know how it goes with 1.1 and 2.0 devices.

Index: ehci.c
===
RCS file: /cvs/src/sys/dev/usb/ehci.c,v
retrieving revision 1.182
diff -u -p -r1.182 ehci.c
--- ehci.c  25 Mar 2015 13:23:05 -  1.182
+++ ehci.c  28 Mar 2015 10:23:48 -
@@ -106,6 +106,7 @@ voidehci_check_intr(struct ehci_softc 
 void   ehci_check_qh_intr(struct ehci_softc *, struct usbd_xfer *);
 void   ehci_check_itd_intr(struct ehci_softc *, struct usbd_xfer *);
 void   ehci_idone(struct usbd_xfer *);
+void   ehci_isoc_idone(struct usbd_xfer *);
 void   ehci_timeout(void *);
 void   ehci_timeout_task(void *);
 void   ehci_intrlist_timeout(void *);
@@ -163,10 +164,12 @@ usbd_status   ehci_alloc_sqtd_chain(struct
struct usbd_xfer *, struct ehci_soft_qtd **, struct 
ehci_soft_qtd **);
 void   ehci_free_sqtd_chain(struct ehci_softc *, struct ehci_xfer *);
 
-struct ehci_soft_itd *ehci_alloc_itd(struct ehci_softc *sc);
-void   ehci_free_itd(struct ehci_softc *sc, struct ehci_soft_itd *itd);
-void   ehci_rem_free_itd_chain(struct ehci_softc *sc,
-   struct ehci_xfer *);
+struct ehci_soft_itd *ehci_alloc_itd(struct ehci_softc *);
+void   ehci_free_itd(struct ehci_softc *, struct ehci_soft_itd *);
+void   ehci_rem_itd_chain(struct ehci_softc *, struct ehci_xfer *);
+void   ehci_free_itd_chain(struct ehci_softc *, struct ehci_xfer *);
+intehci_alloc_itd_chain(struct ehci_softc *, struct usbd_xfer *);
+intehci_alloc_sitd_chain(struct ehci_softc *, struct usbd_xfer *);
 void   ehci_abort_isoc_xfer(struct usbd_xfer *xfer,
usbd_status status);
 
@@ -191,7 +194,6 @@ voidehci_dump_sqtd(struct ehci_soft_qt
 void   ehci_dump_qtd(struct ehci_qtd *);
 void   ehci_dump_sqh(struct ehci_soft_qh *);
 #if notyet
-void   ehci_dump_sitd(struct ehci_soft_itd *itd);
 void   ehci_dump_itd(struct ehci_soft_itd *);
 #endif
 #ifdef DIAGNOSTIC
@@ -353,8 +355,10 @@ ehci_init(struct ehci_softc *sc)
 
sc-sc_softitds = mallocarray(sc-sc_flsize,
sizeof(struct ehci_soft_itd *), M_USB, M_NOWAIT | M_ZERO);
-   if (sc-sc_softitds == NULL)
+   if (sc-sc_softitds == NULL) {
+   usb_freemem(sc-sc_bus, sc-sc_fldma);
return (ENOMEM);
+   }
LIST_INIT(sc-sc_freeitds);
TAILQ_INIT(sc-sc_intrhead);
 
@@ -469,6 +473,7 @@ ehci_init(struct ehci_softc *sc)
ehci_free_sqh(sc, sc-sc_async_head);
 #endif
  bad1:
+   free(sc-sc_softitds, M_USB, sc-sc_flsize);
usb_freemem(sc-sc_bus, sc-sc_fldma);
return (err);
 }
@@ -650,49 +655,36 @@ ehci_softintr(void *v)
sc-sc_bus.intr_context--;
 }
 
-/* Check for an interrupt. */
 void
 ehci_check_intr(struct ehci_softc *sc, struct usbd_xfer *xfer)
 {
-   int attr;
+   int attr = xfer-pipe-endpoint-edesc-bmAttributes;
 
-   attr = xfer-pipe-endpoint-edesc-bmAttributes;
if (UE_GET_XFERTYPE(attr) == UE_ISOCHRONOUS)
ehci_check_itd_intr(sc, xfer);
else
ehci_check_qh_intr(sc, xfer);
-
-   return;
 }
 
 void
 ehci_check_qh_intr(struct ehci_softc *sc, struct usbd_xfer *xfer)
 {
struct ehci_xfer *ex = (struct ehci_xfer *)xfer;
-   struct ehci_soft_qtd *sqtd, *lsqtd;
+   struct ehci_soft_qtd *sqtd, *lsqtd = ex-sqtdend;
uint32_t status;
 
-   if (ex-sqtdstart == NULL) {
-   printf(ehci_check_qh_intr: not valid sqtd\n);
-   return;
-   }
+   KASSERT(ex-sqtdstart != NULL  ex-sqtdend != NULL);
+
+   usb_syncmem(lsqtd-dma,
+   lsqtd-offs + offsetof(struct ehci_qtd, qtd_status),
+   sizeof(lsqtd-qtd.qtd_status),
+   BUS_DMASYNC_POSTWRITE | BUS_DMASYNC_POSTREAD);
 
-   lsqtd = ex-sqtdend;
-#ifdef DIAGNOSTIC
-   if (lsqtd == NULL) {
-   printf(ehci_check_qh_intr: lsqtd==0\n);
-   return;
-   }
-#endif
/*
 * If the last TD is still active we need to check whether there
 * is a an error somewhere in the middle, or whether there was a
 * short packet (SPD and not ACTIVE).
 */
-   usb_syncmem(lsqtd-dma,
-   lsqtd-offs + offsetof(struct ehci_qtd, qtd_status),
-   sizeof(lsqtd-qtd.qtd_status),
-   BUS_DMASYNC_POSTWRITE | 

Re: elantech-v4 clickpad support

2015-01-29 Thread Martin Pieuchot
On 30/01/15(Fri) 01:25, Ulf Brosziewski wrote:
 Probably I was too sceptical about synaptics.c. The bug I observed
 with the ALPS touchpad seems to be due to a kind of mismatch between
 the ALPS code in pms and the event handling in wsconscomm. The patch
 below contains the initial change as well as what was necessary to
 fix this.

Do you think it is possible to fix the pms(4) driver instead of adding
another quirk?

 
 diff --git a/wsconscomm.c b/wsconscomm.c
 index df3512d..9c5afe7 100644
 --- a/wsconscomm.c
 +++ b/wsconscomm.c
 @@ -132,12 +132,6 @@ WSConsReadHwState(InputInfoPtr pInfo,
  struct wscons_event event;
  Bool v;
 
 -/* Reset cumulative values if buttons were not previously pressed */
 -if (!hw-left  !hw-right  !hw-middle) {
 -hw-cumulative_dx = hw-x;
 -hw-cumulative_dy = hw-y;
 -}
 -
  while (WSConsReadEvent(pInfo, event)) {
  switch (event.type) {
  case WSCONS_EVENT_MOUSE_UP:
 @@ -187,9 +181,11 @@ WSConsReadHwState(InputInfoPtr pInfo,
  break;
  case WSCONS_EVENT_MOUSE_ABSOLUTE_X:
  hw-x = event.value;
 +hw-cumulative_dx = hw-x;
  break;
  case WSCONS_EVENT_MOUSE_ABSOLUTE_Y:
  hw-y = priv-maxy - event.value + priv-miny;
 +hw-cumulative_dy = hw-y;
  break;
  case WSCONS_EVENT_MOUSE_ABSOLUTE_Z:
  hw-z = event.value;
 @@ -204,8 +200,14 @@ WSConsReadHwState(InputInfoPtr pInfo,
  /* XXX magic number mapping which is mirrored in pms driver */
  switch (event.value) {
  case 0:
 -hw-fingerWidth = 5;
 -hw-numFingers = 2;
 +if (priv-model != MODEL_ALPS) {
 +hw-fingerWidth = 5;
 +hw-numFingers = 2;
 +} else {
 +/* For ALPS models pms reports that w is 0 if (z = 0)? 
 */
 +hw-fingerWidth = 0;
 +hw-numFingers = 0;
 +}
  break;
  case 1:
  hw-fingerWidth = 5;
 



Re: [NEW] Driver for the Araneus Alea II USB TRNG

2015-04-14 Thread Martin Pieuchot
On 14/04/15(Tue) 07:40, attila wrote:
 [...]
 Feedback most welcome.

See below.

 /* -*- mode:c; tab-width:8; indent-tabs-mode:t; c-basic-offset:8 -*- */

We do not include editor settings in files, the first line should
contain:

/*  $OpenBSD$ */

Which will be expanded by CVS.

 /*
  * Copyright (c) 2006 Alexander Yurchenko gra...@openbsd.org
  * Copyright (c) 2007 Marc Balmer mbal...@openbsd.org
  * Copyright (C) 2015 attila att...@stalphonsos.com
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
  * copyright notice and this permission notice appear in all copies.
  *
  * THE SOFTWARE IS PROVIDED AS IS AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 
 /*
  * Alea II TRNG.  Produces 100kbit/sec of entropy by black magic
  *
  * Product information in English can be found here:
  * http://www.araneus.fi/products/alea2/en/
  *
  * I only have an Alea II to play with but the documentation says
  * that the Alea I is the same, so they should also work.
  *
  * I cribbed liberally from both the uow and umbg drivers, both of
  * which are similar to this situation in different ways.

The two last paragraphs do not add much information, think about what will
help you when you'll have to read this code again in a couple of years
:)

  */
 
 #include sys/param.h
 #include sys/systm.h
 #include sys/device.h
 #include sys/kernel.h
 #include sys/time.h

I believe you need sys/time.h just to make sure your driver works as
expected.  You kept this code as #ifdef ALEA_DEBUG but does it really
help to debug something?  Do you think it's worth keeping this code?
We try to not add too much verbosity to driver code.

 #include sys/timeout.h
 
 #include dev/usb/usb.h
 #include dev/usb/usbdevs.h
 #include dev/usb/usbdi.h
 #include dev/usb/usbdi_util.h
 
 #include dev/rndvar.h
 
 #define ALEA_IFACE0
 #define ALEA_MSECS10

How did you choose 10msec?  

 #define ALEA_READ_TOUT1100

Compared to a transfer timeout of 1,1 second is seems very short.  By
the way we generally spell timeout TIMEOUT :o)

 #define ALEA_BUFSIZ   ((1024/8)*100)  /* 100 kbits */
 /*#define ALEA_DEBUG  1*/ /* comment out */
 
 #define OURNAME(x)x-sc_dev.dv_xname

We generally use DEVNAME(), look at how it is defined :) 

 
 struct ualea_softc {
   struct  device sc_dev;
   struct  usbd_device *sc_udev;
   struct  usbd_interface *sc_iface;
   struct  usbd_pipe *sc_ibulk;
   struct  timeout sc_tout;
   struct  usb_task sc_task;
 #ifdef ALEA_DEBUG
   struct  timespec sc_tattach;
   u_int32_t sc_nbits;
 #endif
 };
 
 int ualea_match(struct device *, void *, void *);
 void ualea_attach(struct device *, struct device *, void *);
 int ualea_detach(struct device *, int);
 void ualea_task(void *);
 void ualea_intr(void *);
 
 struct cfdriver ualea_cd = {
   NULL, ualea, DV_DULL
 };
 
 const struct cfattach ualea_ca = {
   sizeof(struct ualea_softc),
   ualea_match,
   ualea_attach,
   ualea_detach
 };
 
 static const struct usb_devno ualea_devs[] = {
   { USB_VENDOR_ARANEUS,   USB_PRODUCT_ARANEUS_ALEA }
 };

Is it possible to match your device based on the content of the device
descriptor instead of whitelisting IDs?  Whitelisting means that if the
company produce a compatible device with a new ID we'll need to modify
the driver.

 int
 ualea_match(struct device *parent, void *match, void *aux)
 {
   struct usb_attach_arg *uaa = aux;
 
   if (uaa-iface != NULL)
   return (UMATCH_NONE);

This line means that you're waiting for the USB stack to set the
first valid configuration for you, so you don't need most of the
code in *_attach().


 #ifdef ALEA_DEBUG
   if (uaa-vendor == USB_VENDOR_ARANEUS)
   printf(ualea: vendor 0x%x (%d) (ARANEUS) product 0x%x (%d)\n,
   uaa-vendor, uaa-vendor, uaa-product, uaa-product);
 #endif

Please kill this debug chunk, you have the same information with
usbdevs(8).

   return ((usb_lookup(ualea_devs, uaa-vendor, uaa-product) != NULL) ?
   UMATCH_VENDOR_PRODUCT : UMATCH_NONE);
 }
 
 void
 ualea_attach(struct device *parent, struct device *self, void *aux)
 {
   struct ualea_softc *sc = (struct ualea_softc *)self;
   struct usb_attach_arg *uaa = aux;
   usb_interface_descriptor_t *id;
   usb_endpoint_descriptor_t *ed;
   int ep_ibulk = -1;
   usbd_status error;
   int i;
 
   sc-sc_udev = uaa-device;
   error = 

Re: Change the way we handle interface/connected networks

2015-04-23 Thread Martin Pieuchot
On 15/04/15(Wed) 23:00, Claudio Jeker wrote:
 On Wed, Mar 18, 2015 at 05:46:34AM +0100, Claudio Jeker wrote:
  On Tue, Mar 17, 2015 at 05:35:21PM +0100, Martin Pieuchot wrote:
   On 12/02/15(Thu) 12:35, Martin Pieuchot wrote:
On 10/02/15(Tue) 03:04, Claudio Jeker wrote:
 There is no need to not allow the same network to be configured more 
 then
 once. Instead just rely on the multipath and priority handling of the
 routing table to select the right route.
 Additionally this removes cloned routes (arp/npd cache) when the 
 interface
 goes down or when the any of the multipath cloning route is changed.
 
 With this it is possible to run 2 dhclients on wired and wireless 
 with a
 bridged network. Active TCP sessions still fail when the cable is
 unplugged. To fix this more is needed.
 
 This changes a fundamental part of the network stack and therefor 
 broad
 testing is needed to find all the hidden dragons.

  It is broken for IPv6 and I could not find the proper fix yet. I think I
  now why it goes wrong but the nd6 code is a nightmare.
  
  I will send out a new diff once I have IPv6 fixed.
   
 
 Unsurprisingly IPv6 needs to be special and is not using rt_ifa_add or
 rt_ifa_del in all cases. 

Not yet!  That would be nice to convert them :)

  There are three special cases that do the same
 dance but use ifa-ifa_addr as the gateway and because of this the
 resulting interface routes are not catched by the nd6 code (RTF_LLINFO is
 missing). When the routes are then cloned nd6 is not invoced and
 everything points back to the host. Oups.

It still think we need to use ifa-ifa_addr as gateway in some cases,
see below.

 The following updated diff seems to fix this but I only minimally tested
 the IPv6 part. People using IPv6 may want to give this a spin.

Same here.  I mostly played with dhclient with 3 interfaces and apart
the fact that only interfaces with different priorities will get a
cloning route it works fine.  But that can be improve later.

 IMO the net/if_var.h and netinet/ip_carp.c changes could be commited
 before the rest since there should be no noticeable change in how carp
 works.

I agree.

I have some comments inline:

 @@ -1106,16 +1117,20 @@ rt_ifa_add(struct ifaddr *ifa, int flags
  {
   struct rtentry  *rt, *nrt = NULL;
   struct sockaddr_rtlabel  sa_rl;
 + struct sockaddr_dl   sa_dl = { sizeof(sa_dl), AF_LINK };
   struct rt_addrinfo   info;
   u_short  rtableid = ifa-ifa_ifp-if_rdomain;
 - u_int8_t prio = RTP_CONNECTED;
 + u_int8_t prio = ifa-ifa_ifp-if_priority + RTP_STATIC;
   int  error;
  
 + sa_dl.sdl_type = ifa-ifa_ifp-if_type;
 + sa_dl.sdl_index = ifa-ifa_ifp-if_index;

I do not like having more sdl stuff here but I don't see a simpler way
to do that right now since cloning routes need a sockaddr_dl with the
right ifp index but an empty address.

I considered using ifp-if_sadl instead but that means we need to clear
the link-layer address somewhere and this wouldn't help if we change it
after adding a local or cloning route.

IMHO we cannot fix this without improving the RTM_RESOLVE logic. See my
comment below.  I think we should be able to call ifa_rtrequest() before
adding the rtentry to the routing table, this would prevent another layer
violation and properly bail if the ARP or ND informations are incorrects.

   memset(info, 0, sizeof(info));
   info.rti_ifa = ifa;
 - info.rti_flags = flags;
 + info.rti_flags = flags | RTF_MPATH;
   info.rti_info[RTAX_DST] = dst;
 - info.rti_info[RTAX_GATEWAY] = ifa-ifa_addr;
 + info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)sa_dl;

Here I strongly believe that we should only use a sockaddr_dl as gateway
if we are creating a RTF_CLONING or RTF_LLINFO route.

The first reason is that p2p interfaces generally add a route like:

dstaddr localaddr   UH  0   0   -   pppoe0

And I'm afraid using a sockaddr_dl might break things.

The second reason is that having sockaddr_dl in the gateway is tied to
the RTM_RESOLVE mechanism related to ifa_rtrequest, which does not apply
to IFF_LOOPBACK or IFF_POINTOPOINT interfaces.


 Index: netinet/if_ether.c
 ===
 RCS file: /cvs/src/sys/netinet/if_ether.c,v
 retrieving revision 1.150
 diff -u -p -r1.150 if_ether.c
 --- netinet/if_ether.c10 Apr 2015 13:58:20 -  1.150
 +++ netinet/if_ether.c12 Apr 2015 11:47:03 -
 @@ -121,8 +121,6 @@ void  db_print_llinfo(caddr_t);
  int  db_show_radix_node(struct radix_node *, void *, u_int);
  #endif
  
 -static const struct sockaddr_dl null_sdl = { sizeof(null_sdl), AF_LINK };
 -
  /*
   * Timeout routine.  Age arp_tab entries periodically.
   */
 @@ -190,14 +188,6 @@ arp_rtrequest(int req, struct

Re: [PATCH] Disable USB bus probes

2015-04-22 Thread Martin Pieuchot
On 17/04/15(Fri) 16:47, Dimitris Papastamos wrote:
 Hi,
 
 This patch adds an option to usbdevs(8) to disable USB bus probing
 at runtime.  The operation is restricted to the root user.

It would be nice to show if probing is on or off, for example

# usbdevs -p
bus probing: on

# usbdevs -p off

But other people might have better suggestions.

 I am not sure if this approach is sensible or even correct.  Some
 pointers would be much appreciated.

Setting a variable per hub (and here roothub) is overkill, a global
would be enough.

 
 This was started as part of a reply by mpi on tech@
 
   http://marc.info/?l=openbsd-techm=142917883126679w=2
 
 I guess the reasoning behind this is to add some protection against
 things like badusb?

It can have multiple usages :)

How did you try it?  What happen if you plug a hub with multiple
devices, turn bus probing off then detach the hub?

What happen if you plug a device like a phone that use the power to
charge its battery after turning probing off.  If I read your diff
correctly you still allow the device to be charged which is fine.  Did
you try that?  I think it's worth a documentation note.

   if (!dev-self_powered  dev-powersrc-parent != NULL 
   !dev-powersrc-parent-self_powered) {
 @@ -494,6 +495,9 @@ uhub_explore(struct usbd_device *dev)
*/
   if (speed  sc-sc_hub-speed)
   speed = sc-sc_hub-speed;
 +
 + if (dev-hub-noprobe)
 + return (0);

I believe you can move that before the Figure out device speed. 



carp(4) ether_input_mbuf() take 2

2015-04-21 Thread Martin Pieuchot
Here's a new diff to convert carp_input() to use ether_input_mbuf()
instead of ether_input().  This is a necessary step to be able to
convert other pseudo-drivers to if_input() but it is just needed for
the transition.

This version only call M_PREPEND(9) on mbufs that will be passed to
ether_input_mbuf(9).  This fix the cases where vlan_input() returns
1 and ether_input() continue its processing believing the mbuf hasn't
been modified.

This version also returns 0 in case M_PREPEND(9) fails, preventing a
use after free.

Does it work for you, ok?

Index: netinet/ip_carp.c
===
RCS file: /cvs/src/sys/netinet/ip_carp.c,v
retrieving revision 1.252
diff -u -p -r1.252 ip_carp.c
--- netinet/ip_carp.c   21 Apr 2015 09:35:32 -  1.252
+++ netinet/ip_carp.c   21 Apr 2015 10:22:10 -
@@ -1416,15 +1416,15 @@ carp_our_mcastaddr(struct ifnet *ifp, u_
 int
 carp_input(struct ifnet *ifp0, struct ether_header *eh0, struct mbuf *m)
 {
-   struct ether_header eh;
+   struct ether_header *eh;
struct carp_if *cif = (struct carp_if *)ifp0-if_carp;
struct ifnet *ifp;
 
-   memcpy(eh, eh0, sizeof(eh));
+   ifp = carp_ourether(cif, eh0-ether_dhost);
+   if (ifp == NULL  (m-m_flags  (M_BCAST|M_MCAST)) == 0)
+   return (1);
 
-   if ((ifp = carp_ourether(cif, eh0-ether_dhost)))
-   ;
-   else if (m-m_flags  (M_BCAST|M_MCAST)) {
+   if (ifp == NULL) {
struct carp_softc *vh;
struct mbuf *m0;
 
@@ -1438,30 +1438,39 @@ carp_input(struct ifnet *ifp0, struct et
m0 = m_copym2(m, 0, M_COPYALL, M_DONTWAIT);
if (m0 == NULL)
continue;
+   M_PREPEND(m0, sizeof(*eh), M_DONTWAIT);
+   if (m0 == NULL)
+   continue;
+   eh = mtod(m0, struct ether_header *);
+   memmove(eh, eh0, sizeof(*eh));
+
m0-m_pkthdr.rcvif = vh-sc_if;
 #if NBPFILTER  0
if (vh-sc_if.if_bpf)
-   bpf_mtap_hdr(vh-sc_if.if_bpf, (char *)eh,
-   ETHER_HDR_LEN, m0, BPF_DIRECTION_IN, NULL);
+   bpf_mtap_ether(vh-sc_if.if_bpf, m0,
+   BPF_DIRECTION_IN);
 #endif
vh-sc_if.if_ipackets++;
-   ether_input(m0, eh);
+   ether_input_mbuf(vh-sc_if, m0);
}
+
return (1);
}
 
-   if (ifp == NULL)
-   return (1);
+   M_PREPEND(m, sizeof(*eh), M_DONTWAIT);
+   if (m == NULL)
+   return (0);
+   eh = mtod(m, struct ether_header *);
+   memmove(eh, eh0, sizeof(*eh));
 
m-m_pkthdr.rcvif = ifp;
 
 #if NBPFILTER  0
if (ifp-if_bpf)
-   bpf_mtap_hdr(ifp-if_bpf, (char *)eh, ETHER_HDR_LEN, m,
-   BPF_DIRECTION_IN, NULL);
+   bpf_mtap_ether(ifp-if_bpf, m, BPF_DIRECTION_IN);
 #endif
ifp-if_ipackets++;
-   ether_input(m, eh);
+   ether_input_mbuf(ifp, m);
 
return (0);
 }



bridge(4) and ether_input_mbuf()

2015-04-21 Thread Martin Pieuchot
This diff adds the necessary glue to bridge(4) to be able to convert
other pseudo-drivers to if_input().  This will also help bridge(4)'s
own conversion.

Since bridge_input() already returns a mbuf, I believe this change is
less error prone than the carp(4) one.  But I appreciate reviews,
tests and oks.

Index: net/bridgestp.c
===
RCS file: /cvs/src/sys/net/bridgestp.c,v
retrieving revision 1.52
diff -u -p -r1.52 bridgestp.c
--- net/bridgestp.c 14 Mar 2015 03:38:51 -  1.52
+++ net/bridgestp.c 20 Apr 2015 11:35:56 -
@@ -596,6 +596,9 @@ bstp_input(struct bstp_state *bs, struct
len = ntohs(eh-ether_type);
if (len  sizeof(tpdu))
goto out;
+
+   m_adj(m, ETHER_HDR_LEN);
+
if (m-m_pkthdr.len  len)
m_adj(m, len - m-m_pkthdr.len);
if ((m = m_pullup(m, sizeof(tpdu))) == NULL)
Index: net/if_bridge.c
===
RCS file: /cvs/src/sys/net/if_bridge.c,v
retrieving revision 1.235
diff -u -p -r1.235 if_bridge.c
--- net/if_bridge.c 17 Apr 2015 11:04:01 -  1.235
+++ net/if_bridge.c 20 Apr 2015 11:55:38 -
@@ -115,10 +115,9 @@ void   bridge_broadcast(struct bridge_soft
 struct ether_header *, struct mbuf *);
 void   bridge_localbroadcast(struct bridge_softc *, struct ifnet *,
 struct ether_header *, struct mbuf *);
-void   bridge_span(struct bridge_softc *, struct ether_header *,
-struct mbuf *);
+void   bridge_span(struct bridge_softc *, struct mbuf *);
 struct mbuf *bridge_dispatch(struct bridge_iflist *, struct ifnet *,
-struct ether_header *, struct mbuf *);
+struct mbuf *);
 void   bridge_stop(struct bridge_softc *);
 void   bridge_init(struct bridge_softc *);
 intbridge_bifconf(struct bridge_softc *, struct ifbifconf *);
@@ -180,6 +179,7 @@ int
 bridge_clone_create(struct if_clone *ifc, int unit)
 {
struct bridge_softc *sc;
+   struct ifih *bridge_ifih;
struct ifnet *ifp;
int i, s;
 
@@ -187,8 +187,15 @@ bridge_clone_create(struct if_clone *ifc
if (!sc)
return (ENOMEM);
 
+   bridge_ifih = malloc(sizeof(*bridge_ifih), M_DEVBUF, M_NOWAIT);
+   if (bridge_ifih == NULL) {
+   free(sc, M_DEVBUF, 0);
+   return (ENOMEM);
+   }
+
sc-sc_stp = bstp_create(sc-sc_if);
if (!sc-sc_stp) {
+   free(bridge_ifih, M_DEVBUF, sizeof(*bridge_ifih));
free(sc, M_DEVBUF, 0);
return (ENOMEM);
}
@@ -222,6 +229,9 @@ bridge_clone_create(struct if_clone *ifc
DLT_EN10MB, ETHER_HDR_LEN);
 #endif
 
+   bridge_ifih-ifih_input = ether_input;
+   SLIST_INSERT_HEAD(ifp-if_inputs, bridge_ifih, ifih_next);
+
s = splnet();
LIST_INSERT_HEAD(bridge_list, sc, sc_list);
splx(s);
@@ -234,6 +244,7 @@ bridge_clone_destroy(struct ifnet *ifp)
 {
struct bridge_softc *sc = ifp-if_softc;
struct bridge_iflist *bif;
+   struct ifih *bridge_ifih;
int s;
 
bridge_stop(sc);
@@ -250,6 +261,17 @@ bridge_clone_destroy(struct ifnet *ifp)
splx(s);
 
bstp_destroy(sc-sc_stp);
+
+   /* Undo pseudo-driver changes. */
+   if_deactivate(ifp);
+
+   bridge_ifih = SLIST_FIRST(ifp-if_inputs);
+   SLIST_REMOVE_HEAD(ifp-if_inputs, ifih_next);
+
+   KASSERT(SLIST_EMPTY(ifp-if_inputs));
+
+   free(bridge_ifih, M_DEVBUF, sizeof(*bridge_ifih));
+
if_detach(ifp);
 
free(sc, M_DEVBUF, 0);
@@ -990,7 +1012,7 @@ bridge_output(struct ifnet *ifp, struct 
struct mbuf *mc;
int used = 0;
 
-   bridge_span(sc, NULL, m);
+   bridge_span(sc, m);
 
TAILQ_FOREACH(p, sc-sc_iflist, next) {
dst_if = p-ifp;
@@ -1066,7 +1088,7 @@ sendunicast:
(sa = bridge_tunneltag(m, dst_p-brt_tunnel.sa.sa_family)) != NULL)
memcpy(sa, dst_p-brt_tunnel.sa, dst_p-brt_tunnel.sa.sa_len);
 
-   bridge_span(sc, NULL, m);
+   bridge_span(sc, m);
if ((dst_if-if_flags  IFF_RUNNING) == 0) {
m_freem(m);
return (ENETDOWN);
@@ -1282,12 +1304,13 @@ bridgeintr_frame(struct bridge_softc *sc
  * not for us, and schedule an interrupt.
  */
 struct mbuf *
-bridge_input(struct ifnet *ifp, struct ether_header *eh, struct mbuf *m)
+bridge_input(struct ifnet *ifp, struct ether_header *eh0, struct mbuf *m)
 {
struct bridge_softc *sc;
struct bridge_iflist *ifl;
+   struct ether_header *eh;
 #if NVLAN  0
-   uint16_t etype = ntohs(eh-ether_type);
+   uint16_t etype = ntohs(eh0-ether_type);
 #endif /* NVLAN  0 */
 
/*
@@ -1306,15 +1329,20 @@ bridge_input(struct ifnet *ifp, struct e
if ((sc-sc_if.if_flags  IFF_RUNNING) == 0)
return (m);
 
+   M_PREPEND(m, sizeof(*eh), 

Re: bridge(4) and ether_input_mbuf()

2015-04-28 Thread Martin Pieuchot
On 21/04/15(Tue) 12:35, Martin Pieuchot wrote:
 This diff adds the necessary glue to bridge(4) to be able to convert
 other pseudo-drivers to if_input().  This will also help bridge(4)'s
 own conversion.
 
 Since bridge_input() already returns a mbuf, I believe this change is
 less error prone than the carp(4) one.  But I appreciate reviews,
 tests and oks.

Anybody?

 
 Index: net/bridgestp.c
 ===
 RCS file: /cvs/src/sys/net/bridgestp.c,v
 retrieving revision 1.52
 diff -u -p -r1.52 bridgestp.c
 --- net/bridgestp.c   14 Mar 2015 03:38:51 -  1.52
 +++ net/bridgestp.c   20 Apr 2015 11:35:56 -
 @@ -596,6 +596,9 @@ bstp_input(struct bstp_state *bs, struct
   len = ntohs(eh-ether_type);
   if (len  sizeof(tpdu))
   goto out;
 +
 + m_adj(m, ETHER_HDR_LEN);
 +
   if (m-m_pkthdr.len  len)
   m_adj(m, len - m-m_pkthdr.len);
   if ((m = m_pullup(m, sizeof(tpdu))) == NULL)
 Index: net/if_bridge.c
 ===
 RCS file: /cvs/src/sys/net/if_bridge.c,v
 retrieving revision 1.235
 diff -u -p -r1.235 if_bridge.c
 --- net/if_bridge.c   17 Apr 2015 11:04:01 -  1.235
 +++ net/if_bridge.c   20 Apr 2015 11:55:38 -
 @@ -115,10 +115,9 @@ void bridge_broadcast(struct bridge_soft
  struct ether_header *, struct mbuf *);
  void bridge_localbroadcast(struct bridge_softc *, struct ifnet *,
  struct ether_header *, struct mbuf *);
 -void bridge_span(struct bridge_softc *, struct ether_header *,
 -struct mbuf *);
 +void bridge_span(struct bridge_softc *, struct mbuf *);
  struct mbuf *bridge_dispatch(struct bridge_iflist *, struct ifnet *,
 -struct ether_header *, struct mbuf *);
 +  struct mbuf *);
  void bridge_stop(struct bridge_softc *);
  void bridge_init(struct bridge_softc *);
  int  bridge_bifconf(struct bridge_softc *, struct ifbifconf *);
 @@ -180,6 +179,7 @@ int
  bridge_clone_create(struct if_clone *ifc, int unit)
  {
   struct bridge_softc *sc;
 + struct ifih *bridge_ifih;
   struct ifnet *ifp;
   int i, s;
  
 @@ -187,8 +187,15 @@ bridge_clone_create(struct if_clone *ifc
   if (!sc)
   return (ENOMEM);
  
 + bridge_ifih = malloc(sizeof(*bridge_ifih), M_DEVBUF, M_NOWAIT);
 + if (bridge_ifih == NULL) {
 + free(sc, M_DEVBUF, 0);
 + return (ENOMEM);
 + }
 +
   sc-sc_stp = bstp_create(sc-sc_if);
   if (!sc-sc_stp) {
 + free(bridge_ifih, M_DEVBUF, sizeof(*bridge_ifih));
   free(sc, M_DEVBUF, 0);
   return (ENOMEM);
   }
 @@ -222,6 +229,9 @@ bridge_clone_create(struct if_clone *ifc
   DLT_EN10MB, ETHER_HDR_LEN);
  #endif
  
 + bridge_ifih-ifih_input = ether_input;
 + SLIST_INSERT_HEAD(ifp-if_inputs, bridge_ifih, ifih_next);
 +
   s = splnet();
   LIST_INSERT_HEAD(bridge_list, sc, sc_list);
   splx(s);
 @@ -234,6 +244,7 @@ bridge_clone_destroy(struct ifnet *ifp)
  {
   struct bridge_softc *sc = ifp-if_softc;
   struct bridge_iflist *bif;
 + struct ifih *bridge_ifih;
   int s;
  
   bridge_stop(sc);
 @@ -250,6 +261,17 @@ bridge_clone_destroy(struct ifnet *ifp)
   splx(s);
  
   bstp_destroy(sc-sc_stp);
 +
 + /* Undo pseudo-driver changes. */
 + if_deactivate(ifp);
 +
 + bridge_ifih = SLIST_FIRST(ifp-if_inputs);
 + SLIST_REMOVE_HEAD(ifp-if_inputs, ifih_next);
 +
 + KASSERT(SLIST_EMPTY(ifp-if_inputs));
 +
 + free(bridge_ifih, M_DEVBUF, sizeof(*bridge_ifih));
 +
   if_detach(ifp);
  
   free(sc, M_DEVBUF, 0);
 @@ -990,7 +1012,7 @@ bridge_output(struct ifnet *ifp, struct 
   struct mbuf *mc;
   int used = 0;
  
 - bridge_span(sc, NULL, m);
 + bridge_span(sc, m);
  
   TAILQ_FOREACH(p, sc-sc_iflist, next) {
   dst_if = p-ifp;
 @@ -1066,7 +1088,7 @@ sendunicast:
   (sa = bridge_tunneltag(m, dst_p-brt_tunnel.sa.sa_family)) != NULL)
   memcpy(sa, dst_p-brt_tunnel.sa, dst_p-brt_tunnel.sa.sa_len);
  
 - bridge_span(sc, NULL, m);
 + bridge_span(sc, m);
   if ((dst_if-if_flags  IFF_RUNNING) == 0) {
   m_freem(m);
   return (ENETDOWN);
 @@ -1282,12 +1304,13 @@ bridgeintr_frame(struct bridge_softc *sc
   * not for us, and schedule an interrupt.
   */
  struct mbuf *
 -bridge_input(struct ifnet *ifp, struct ether_header *eh, struct mbuf *m)
 +bridge_input(struct ifnet *ifp, struct ether_header *eh0, struct mbuf *m)
  {
   struct bridge_softc *sc;
   struct bridge_iflist *ifl;
 + struct ether_header *eh;
  #if NVLAN  0
 - uint16_t etype = ntohs(eh-ether_type);
 + uint16_t etype = ntohs(eh0-ether_type);
  #endif /* NVLAN  0 */
  
   /*
 @@ -1306,15 +1329,20 @@ bridge_input(struct ifnet *ifp, struct e
   if ((sc-sc_if.if_flags  IFF_RUNNING) == 0)
   return (m

Re: Async upd(4) - patch 7/7

2015-04-30 Thread Martin Pieuchot
On 24/04/15(Fri) 20:48, David Higgs wrote:
 This is the final patch in the series.
 
 Utilize the pending flags and report callback for their intended purpose - to 
 process async behavior.
 
 Apply splusb() to ensure report callbacks can't fire before their data 
 structures have been properly updated.  This only needs to happen in 
 upd_refresh(); all other calls to upd_request_children() are from a report 
 callback.

This is some good work.  I don't think your patch #6 makes sense without
#7, so I merged them.

I don't think you need a pending flags for the sensors, since what you
are querying are the reports.  You basically do not want to send to
requests for the same report at once.

I'd also take the simpler approach of not deactivating all sensors if
you fail to *submit* a transfer.  When such thing happens it's either
because the device/bus is going away or because we cannot allocate
memory.

I tweaked your diff below, included some comments and your copyright.
This is untested but I hope you'll correct/improve it.

I'm also curious, which new reports/sensors do you want to add?

Cheers,
Martin

Index: upd.c
===
RCS file: /cvs/src/sys/dev/usb/upd.c,v
retrieving revision 1.19
diff -u -p -r1.19 upd.c
--- upd.c   30 Apr 2015 10:09:31 -  1.19
+++ upd.c   30 Apr 2015 11:02:04 -
@@ -1,6 +1,7 @@
 /* $OpenBSD: upd.c,v 1.19 2015/04/30 10:09:31 mpi Exp $ */
 
 /*
+ * Copyright (c) 2015 David Higgs hig...@gmail.com
  * Copyright (c) 2014 Andre de Oliveira an...@openbsd.org
  *
  * Permission to use, copy, modify, and distribute this software for any
@@ -78,25 +79,28 @@ static struct upd_usage_entry upd_usage_
 };
 #define UPD_MAX_SENSORS(nitems(upd_usage_batdep) + 
nitems(upd_usage_roots))
 
+SLIST_HEAD(upd_sensor_head, upd_sensor);
+
 struct upd_report {
-   size_t  size;
-   SLIST_HEAD(, upd_sensor)sensors;
+   size_t  size;   /* Size of the report */
+   struct upd_sensor_head  sensors;/* List in dependency order */
+   int pending;/* Waiting for an answer */
 };
 
-SLIST_HEAD(upd_sensor_head, upd_sensor);
 struct upd_sensor {
-   struct ksensor  ksensor;
-   struct hid_item hitem;
-   int attached;
-   struct upd_sensor_head  children;
-   SLIST_ENTRY(upd_sensor) dep_next;
-   SLIST_ENTRY(upd_sensor) rep_next;
+   struct ksensor  ksensor;
+   struct hid_item hitem;
+   int attached;   /* Is there a matching report */
+   struct upd_sensor_head  children;   /* list of children sensors */
+   SLIST_ENTRY(upd_sensor) dep_next;   /* next in the child list */
+   SLIST_ENTRY(upd_sensor) rep_next;   /* next in the report list */
 };
 
 struct upd_softc {
struct uhidevsc_hdev;
int  sc_num_sensors;
u_intsc_max_repid;
+   char sc_buf[256];
 
/* sensor framework */
struct ksensordevsc_sensordev;
@@ -112,11 +116,13 @@ void upd_attach_sensor_tree(struct upd_s
 struct upd_usage_entry *, struct upd_sensor_head *);
 int  upd_detach(struct device *, int);
 
-void upd_refresh(void *);
-void upd_update_sensors(struct upd_softc *, uint8_t *, unsigned int, int);
-void upd_update_sensor_value(struct upd_softc *, struct upd_sensor *,
-uint8_t *, int);
 void upd_intr(struct uhidev *, void *, uint);
+void upd_refresh(void *);
+void upd_request_children(struct upd_softc *, struct upd_sensor_head *);
+void upd_update_report_cb(void *, int, void *, int);
+
+void upd_sensor_invalidate(struct upd_softc *, struct upd_sensor *);
+void upd_sensor_update(struct upd_softc *, struct upd_sensor *, uint8_t *, 
int);
 int upd_lookup_usage_entry(void *, int, struct upd_usage_entry *,
 struct hid_item *);
 struct upd_sensor *upd_lookup_sensor(struct upd_softc *, int, int);
@@ -126,10 +132,7 @@ struct cfdriver upd_cd = {
 };
 
 const struct cfattach upd_ca = {
-   sizeof(struct upd_softc),
-   upd_match,
-   upd_attach,
-   upd_detach
+   sizeof(struct upd_softc), upd_match, upd_attach, upd_detach
 };
 
 int
@@ -273,42 +276,51 @@ upd_detach(struct device *self, int flag
sensor = sc-sc_sensors[i];
if (sensor-attached)
sensor_detach(sc-sc_sensordev, sensor-ksensor);
-   DPRINTF((upd_detach: %s\n, sensor-ksensor.desc));
}
 
free(sc-sc_reports, M_USBDEV, 0);
free(sc-sc_sensors, M_USBDEV, 0);
-   DPRINTF((upd_detach: complete\n));
return (0);
 }
 
 void
 upd_refresh(void *arg)
 {
-   struct upd_softc*sc = (struct upd_softc *)arg;
+   struct upd_softc*sc = arg;
+   int   

Re: getsock() api modification

2015-04-30 Thread Martin Pieuchot
On 21/04/15(Tue) 17:15, Vitaliy Makkoveev wrote:
 Now fd_getfile() function returns unacquired struct file instance
 [...]
 It is unacceptable on multiprocessor machine because the instance referenced
 by fp can be destroyed between fd_getfile() and FREF() calls. So I want
 fd_getfile() returns acquired fp. [...]

Your diff makes sense but sadly it is broken, could you send a diff that
can be applied?

 Index: share/man/man9/file.9
 ===
 RCS file: /cvs/src/share/man/man9/file.9,v
 retrieving revision 1.12
 diff -u -p -r1.12 file.9
 --- share/man/man9/file.9 4 Jun 2013 19:27:06 -   1.12
 +++ share/man/man9/file.9 21 Apr 2015 13:27:08 -
 @@ -42,7 +42,7 @@
 .Ft struct file *
 .Fn fd_getfile struct filedesc *fdp int fd
 .Ft int
 -.Fn getsock struct filedesc *fdp int fd struct file **fpp
 +.Fn getsock struct proc *p int fd struct file **fpp
 .In sys/file.h
 .In sys/filedesc.h
 .In sys/vnode.h
 @@ -74,21 +74,28 @@ recommended to make complicated kernel A
 .Pp
 The files are extracted from the file descriptor table using the
 functions
 -.Fn fd_getfile ,
 -.Fn getvnode
 +.Fn fd_getfile
 and
 -.Fn getsock .
 +.Fn getvnode .
 .Fn fd_getfile
 performs all necessary checks to see if the file descriptor number is
 within the range of file descriptor table, and if the descriptor is
 valid.
 -.Fn getsock
 -and
 .Fn getvnode
 -are special cases that besides doing
 +is special case that besides doing
 +.Fn fd_getfile
 +also checks if the descriptor is a vnode, returns the proper
 +errno on error and increases the use count with
 +.Fn FREF .
 +.Pp
 +The files are extracted from the process context using the
 +function
 +.Fn getsock .
 +.Fn getsock
 +is special case that besides doing
 .Fn fd_getfile
 -also check if the descriptor is a vnode or socket, return the proper
 -errno on error and increase the use count with
 +also checks if the descriptor is a socket, returns the proper
 +errno on error and increases the use count with
 .Fn FREF .
 .Sh CONCURRENT ACCESS
 Since multiple processes can share the same file descriptor table,
 Index: sys/compat/linux/linux_socket.c
 ===
 RCS file: /cvs/src/sys/compat/linux/linux_socket.c,v
 retrieving revision 1.60
 diff -u -p -r1.60 linux_socket.c
 --- sys/compat/linux/linux_socket.c   30 Jan 2015 23:38:49 -  1.60
 +++ sys/compat/linux/linux_socket.c   21 Apr 2015 13:28:23 -
 @@ -937,7 +937,7 @@ linux_setsockopt(p, v, retval)
   if ((error = copyin((caddr_t) uap, (caddr_t) lsa, sizeof lsa)))
   return error;
 
 - if ((error = getsock(p-p_fd, lsa.s, fp)) != 0)
 + if ((error = getsock(p, lsa.s, fp)) != 0)
   return error;
   
   level = linux_to_bsd_sopt_level(lsa.level);
 Index: sys/kern/subr_log.c
 ===
 RCS file: /cvs/src/sys/kern/subr_log.c,v
 retrieving revision 1.29
 diff -u -p -r1.29 subr_log.c
 --- sys/kern/subr_log.c   14 Mar 2015 03:38:50 -  1.29
 +++ sys/kern/subr_log.c   21 Apr 2015 13:28:42 -
 @@ -334,7 +334,7 @@ logioctl(dev_t dev, u_long com, caddr_t 
   case LIOCSFD:
   if ((error = suser(p, 0)) != 0)
   return (error);
 - if ((error = getsock(p-p_fd, *(int *)data, fp)) != 0)
 + if ((error = getsock(p, *(int *)data, fp)) != 0)
   return (error);
   if (syslogf)
   FRELE(syslogf, p);
 Index: sys/kern/uipc_socket.c
 ===
 RCS file: /cvs/src/sys/kern/uipc_socket.c,v
 retrieving revision 1.137
 diff -u -p -r1.137 uipc_socket.c
 --- sys/kern/uipc_socket.c14 Mar 2015 03:38:51 -  1.137
 +++ sys/kern/uipc_socket.c21 Apr 2015 13:28:42 -
 @@ -1076,7 +1076,7 @@ sosplice(struct socket *so, int fd, off_
   return (EINVAL);
 
   /* Find sosp, the drain socket where data will be spliced into. */
 - if ((error = getsock(curproc-p_fd, fd, fp)) != 0)
 + if ((error = getsock(curproc, fd, fp)) != 0)
   return (error);
   sosp = fp-f_data;
   if (sosp-so_sp == NULL)
 Index: sys/kern/uipc_syscalls.c
 ===
 RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v
 retrieving revision 1.100
 diff -u -p -r1.100 uipc_syscalls.c
 --- sys/kern/uipc_syscalls.c  14 Mar 2015 03:38:51 -  1.100
 +++ sys/kern/uipc_syscalls.c  21 Apr 2015 13:28:42 -
 @@ -120,7 +120,7 @@ sys_bind(struct proc *p, void *v, regist
   struct mbuf *nam;
   int error;
 
 - if ((error = getsock(p-p_fd, SCARG(uap, s), fp)) != 0)
 + if ((error = getsock(p, SCARG(uap, s), fp)) != 0)
   return (error);
   error = sockargs(nam, SCARG(uap, name), SCARG(uap, namelen),
   MT_SONAME);
 @@ -147,7 +147,7 @@ 

Re: [PATCH] Disable USB bus probes

2015-04-27 Thread Martin Pieuchot
On 22/04/15(Wed) 10:29, Dimitris Papastamos wrote:
 [...] 
 On a side note, What do you think of using a sysctl instead of an ioctl?
 I guess an ioctl will do but it would make it easy for people to disable
 this at boot time by simply adding the relevant knob in /etc/sysctl.conf.
 Otherwise I imagine people adding usbdevs -p off in rc.local.

I think it makes sense.  Note that we're currently not using sysctls for
USB so I wonder where the node should be placed.  hw.usb I guess.



Re: carp backup becomes no carrier

2015-04-27 Thread Martin Pieuchot
On 24/04/15(Fri) 21:21, Johan Huldtgren wrote:
 a few hours after I sent the previous e-mail the backup
 (April 23rd snap) took over and became the master, at
 that point I could not reach the carp interfaces anymore.
 Reverting roles so the host running the April 12th snap
 became the master would mostly fix the problems although
 occasionally things would seem to get confused and traffic
 (esp to vlan666, which my laptop isn't on but has access
 to) would cease. Shutting down the node running the April
 23rd snap would generally clear this up, but I'm not sure
 if this is a red herring and there is some caching going
 on somewhere which is clouding my troubleshooting efforts.

This looks like the bug I introduced in r1.251 that should be
fixed in 1.252 and 1.253, I'm puzzled.

 Regardless, I stood up an i386 vm, downloaded -current but
 grabbed ip_carp.c r1.249, and built a new kernel. Copied it
 over to the firewall which had the April 23rd snap and now
 everything is working as it was before. Traffic is flowing
 as expected regardless of which host is master and which is
 backup. It's only been a few hours, but so far so good.

If you try 1.250 and 1.253 and tell me if you can reproduce the problem
that would be really helpful.  In case you see something weird, Could
you include the routing table netstat -rnf inet in your report?  If
you can also play with tcpdump on the various pseudo-interfaces and see
if something is wrong that would be great.

 I noticed some carp weirdness and sthen@ thought it might be worth
 bringing to light.  Quick background, I run two carp nodes, one
 (current master) is running the April 12th snapshot, the other is
 running the April 23rd snapshot. The node running the April 23rd
 snap when it's the backup node ifconfig reports all the carp
 interfaces status' as no carrier whereas before (as far as I can
 remember and on the April 12th snap at least) it would report
 backup. Once the backup becomes the master status changes to
 master.
 
 I don't notice anything not working, however this behavior is perhaps
 not expected.

It's indeed no expected and generally shown for interfaces of type
IFT_ETHER.  I don't understand how this can be shown for carp.

 dmesgs and ifconfig output for each host below, let me know if you
 need anything further.
 
 thanks,
 
 .jh
 
 April 12th snapshot host:
 
 $ dmesg
 syncing disks... done
 OpenBSD 5.7-current (GENERIC) #772: Sun Apr 12 17:38:03 MDT 2015
 dera...@i386.openbsd.org:/usr/src/sys/arch/i386/compile/GENERIC
 cpu0: Geode(TM) Integrated Processor by AMD PCS (AuthenticAMD
 586-class) 500 MHz
 cpu0: FPU,DE,PSE,TSC,MSR,CX8,SEP,PGE,CMOV,CFLUSH,MMX,MMXX,3DNOW2,3DNOW
 real mem  = 536363008 (511MB)
 avail mem = 515301376 (491MB)
 mpath0 at root
 scsibus0 at mpath0: 256 targets
 mainbus0 at root
 bios0 at mainbus0: date 20/71/05, BIOS32 rev. 0 @ 0xfac40
 pcibios0 at bios0: rev 2.0 @ 0xf/0x1
 pcibios0: pcibios_get_intr_routing - function not supported
 pcibios0: PCI IRQ Routing information unavailable.
 pcibios0: PCI bus #0 is the last bus
 bios0: ROM list: 0xc8000/0xa800
 cpu0 at mainbus0: (uniprocessor)
 mtrr: K6-family MTRR support (2 registers)
 amdmsr0 at mainbus0
 pci0 at mainbus0 bus 0: configuration mode 1 (no bios)
 0:20:0: io address conflict 0x6100/0x100
 0:20:0: io address conflict 0x6200/0x200
 pchb0 at pci0 dev 1 function 0 AMD Geode LX rev 0x31
 glxsb0 at pci0 dev 1 function 2 AMD Geode LX Crypto rev 0x00: RNG AES
 vr0 at pci0 dev 6 function 0 VIA VT6105M RhineIII rev 0x96: irq 11,
 address 00:00:24:c9:58:4c
 ukphy0 at vr0 phy 1: Generic IEEE 802.3u media interface, rev. 3: OUI
 0x004063, model 0x0034
 vr1 at pci0 dev 7 function 0 VIA VT6105M RhineIII rev 0x96: irq 5,
 address 00:00:24:c9:58:4d
 ukphy1 at vr1 phy 1: Generic IEEE 802.3u media interface, rev. 3: OUI
 0x004063, model 0x0034
 vr2 at pci0 dev 8 function 0 VIA VT6105M RhineIII rev 0x96: irq 9,
 address 00:00:24:c9:58:4e
 ukphy2 at vr2 phy 1: Generic IEEE 802.3u media interface, rev. 3: OUI
 0x004063, model 0x0034
 vr3 at pci0 dev 9 function 0 VIA VT6105M RhineIII rev 0x96: irq 12,
 address 00:00:24:c9:58:4f
 ukphy3 at vr3 phy 1: Generic IEEE 802.3u media interface, rev. 3: OUI
 0x004063, model 0x0034
 ral0 at pci0 dev 17 function 0 Ralink RT2561S rev 0x00: irq 15,
 address 00:12:0e:61:7f:b0
 ral0: MAC/BBP RT2561C, RF RT5225
 glxpcib0 at pci0 dev 20 function 0 AMD CS5536 ISA rev 0x03: rev 3,
 32-bit 3579545Hz timer, watchdog, gpio, i2c
 gpio0 at glxpcib0: 32 pins
 iic0 at glxpcib0
 pciide0 at pci0 dev 20 function 2 AMD CS5536 IDE rev 0x01: DMA,
 channel 0 wired to compatibility, channel 1 wired to compatibility
 wd0 at pciide0 channel 0 drive 0: SanDisk SDCFX4-8192
 wd0: 4-sector PIO, LBA, 7815MB, 16007040 sectors
 wd0(pciide0:0:0): using PIO mode 4, Ultra-DMA mode 2
 pciide0: channel 1 ignored (disabled)
 ohci0 at pci0 dev 21 function 0 AMD CS5536 USB rev 0x02: irq 7,
 version 1.0, legacy support
 ehci0 at pci0 dev 21 function 1 AMD CS5536 USB rev 0x02: 

Re: [PATCH] Man pages: usbd_open_pipe(9), usbd_close_pipe(9)

2015-05-04 Thread Martin Pieuchot
On 02/05/15(Sat) 08:44, attila wrote:
 Hi tech@,
 
 This patch adds man pages for usbd_open_pipe, usbd_open_pipe_intr,
 usbd_close_pipe and usbd_abort_pipe, done as two files:
 usbd_open_pipe.9 and usbd_close_pipe.9.  It also adds these two new .9
 files to the appropriate Makefile and tweaks usbd_transfer(9) to refer
 to usbd_open_pipe(9).
 
 Comments, feedback most welcome.

Thanks, committed with some tweaks :

  - These functions cannot be called from interrupt context because they
might sleep.

  - I did not return the error values from usbd_open_pipe(9) because they
are almost never checked and useless.

I think we can improve them in-tree, it might be interesting to add more
USB specific information, since people dealing with USB know what
endpoints are but not how to deals with usbd_pipes.

One more note, I think that our stack does not support shared endpoints,
it would be nice to do an audit, but I think that USBD_EXCLUSIVE_ACCESS
should/is the default.

 
 Pax, -A
 
 P.S. I f'ing love mandoc.  Just sayin...
 --
 att...@stalphonsos.com | http://trac.haqistan.net/~attila
 keyid E6CC1EDB | 4D91 1B98 A210 1D71 2A0E  AC29 9677 D0A6 E6CC 1EDB
 

 Index: Makefile
 ===
 RCS file: /cvs/src/share/man/man9/Makefile,v
 retrieving revision 1.230
 diff -u -p -r1.230 Makefile
 --- Makefile  10 Feb 2015 21:56:08 -  1.230
 +++ Makefile  2 May 2015 00:07:16 -
 @@ -31,7 +31,7 @@ MAN=aml_evalnode.9 atomic_add_int.9 ato
   tsleep.9 spl.9 startuphook_establish.9 \
   socreate.9 sosplice.9 style.9 syscall.9 systrace.9 sysctl_int.9 \
   task_add.9 tc_init.9 time.9 timeout.9 tvtohz.9 uiomove.9 uvm.9 \
 - usbd_transfer.9 \
 + usbd_transfer.9 usbd_open_pipe.9 usbd_close_pipe.9 \
   vfs.9 vfs_busy.9 \
   vfs_cache.9 vaccess.9 vclean.9 vcount.9 vdevgone.9 vfinddev.9 vflush.9 \
   vflushbuf.9 vget.9 vgone.9 vhold.9 vinvalbuf.9 vnode.9 vnsubr.9 \
 Index: usbd_close_pipe.9
 ===
 RCS file: usbd_close_pipe.9
 diff -N usbd_close_pipe.9
 --- /dev/null 1 Jan 1970 00:00:00 -
 +++ usbd_close_pipe.9 2 May 2015 00:07:16 -
 @@ -0,0 +1,59 @@
 +.\ $OpenBSD$
 +.\
 +.\ Copyright (c) 2015 Sean Levy att...@stalphonsos.com
 +.\
 +.\ Permission to use, copy, modify, and distribute this software for any
 +.\ purpose with or without fee is hereby granted, provided that the above
 +.\ copyright notice and this permission notice appear in all copies.
 +.\
 +.\ THE SOFTWARE IS PROVIDED AS IS AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 +.\ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 +.\ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 +.\ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 +.\ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 +.\ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 +.\ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 +.\
 +.Dd $Mdocdate$
 +.Dt USBD_CLOSE_PIPE 9
 +.Os
 +.Sh NAME
 +.Nm usbd_close_pipe , usbd_abort_pipe
 +.Nd close or abort transfers on a USB pipe
 +.Sh SYNOPSIS
 +.In dev/usb/usb.h
 +.In dev/usb/usbdi.h
 +.Ft usbd_status
 +.Fn usbd_close_pipe struct usbd_pipe *pipe
 +.Ft usbd_status
 +.Fn usbd_abort_pipe struct usbd_pipe *pipe
 +.Sh DESCRIPTION
 +A pipe is a logical connection between the host and an endpoint
 +on a USB device, created by one of
 +.Xr usbd_open_pipe 9
 +or
 +.Xr usbd_open_pipe_intr 9 .
 +.Pp
 +The
 +.Fn usbd_abort_pipe
 +function aborts any transfers queued on the pipe and ensures it is quiescent
 +before returning.
 +.Pp
 +The
 +.Fn usbd_close_pipe
 +function first calls
 +.Fn usbd_abort_pipe ,
 +then removes the pipe from the relevant USB interface's list of pipes
 +and cleans up any memory associated with the pipe, including any
 +implicit transfer created by
 +.Xr usbd_open_pipe_intr 9 .
 +.Sh CONTEXT
 +.Fn usbd_abort_pipe
 +and
 +.Fn usbd_close_pipe
 +can be called during autoconf, from process context or from interrupt
 +context.
 +.Sh SEE ALSO
 +.Xr usbd_open_pipe 9 ,
 +.Xr usb 4 ,
 +.Xr intro 4
 Index: usbd_open_pipe.9
 ===
 RCS file: usbd_open_pipe.9
 diff -N usbd_open_pipe.9
 --- /dev/null 1 Jan 1970 00:00:00 -
 +++ usbd_open_pipe.9  2 May 2015 00:07:16 -
 @@ -0,0 +1,162 @@
 +.\ $OpenBSD$
 +.\
 +.\ Copyright (c) 2015 Sean Levy att...@stalphonsos.com
 +.\
 +.\ Permission to use, copy, modify, and distribute this software for any
 +.\ purpose with or without fee is hereby granted, provided that the above
 +.\ copyright notice and this permission notice appear in all copies.
 +.\
 +.\ THE SOFTWARE IS PROVIDED AS IS AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 +.\ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 +.\ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 

Re: carp backup becomes no carrier

2015-04-28 Thread Martin Pieuchot
Hello Johan and thanks for your great report!

On 27/04/15(Mon) 11:54, Johan Huldtgren wrote:
 If you try 1.250 and 1.253 and tell me if you can reproduce the problem
 that would be really helpful.  In case you see something weird, Could
 you include the routing table netstat -rnf inet in your report?  If
 you can also play with tcpdump on the various pseudo-interfaces and see
 if something is wrong that would be great.
 
 i'm not sure what is going on, it seems likely that the problem lies
 in my setup or something I've done, I'll explain the behavior I'm
 seeing and hopefully you can use it to rule out that the error lies
 in ip_carp.c at least.

I doubt the problem lies in your setup, I found the master-active
problem, diff below should correct that, can you tell me if it helps?

Index: netinet/ip_carp.c
===
RCS file: /cvs/src/sys/netinet/ip_carp.c,v
retrieving revision 1.253
diff -u -p -r1.253 ip_carp.c
--- netinet/ip_carp.c   22 Apr 2015 06:44:17 -  1.253
+++ netinet/ip_carp.c   28 Apr 2015 09:31:07 -
@@ -750,6 +750,7 @@ carp_clone_create(ifc, unit)
if_attach(ifp);
ether_ifattach(ifp);
ifp-if_type = IFT_CARP;
+   ifp-if_sadl-sdl_type = IFT_CARP;
ifp-if_output = carp_output;
 
/* Hook carp_addr_updated to cope with address and route changes. */



trunk is out, special offer with LACP included!

2015-05-05 Thread Martin Pieuchot
Now that the stack is ready, here's a diff to take the first victim,
trunk(4), out of ether_input().

If you glanced over previous versions of this diff, the differences
are mostly in sys/net/trunklacp.c and include some printf(9) fixes
in bonus.

Ok?

Index: net/if_ethersubr.c
===
RCS file: /cvs/src/sys/net/if_ethersubr.c,v
retrieving revision 1.195
diff -u -p -r1.195 if_ethersubr.c
--- net/if_ethersubr.c  4 May 2015 10:24:08 -   1.195
+++ net/if_ethersubr.c  5 May 2015 12:10:25 -
@@ -463,9 +463,6 @@ ether_input(struct mbuf *m, void *hdr)
int llcfound = 0;
struct llc *l;
struct arpcom *ac;
-#if NTRUNK  0
-   int i = 0;
-#endif
 #if NPPPOE  0
struct ether_header *eh_tmp;
 #endif
@@ -480,21 +477,6 @@ ether_input(struct mbuf *m, void *hdr)
m_adj(m, ETHER_HDR_LEN);
}
 
-#if NTRUNK  0
-   /* Handle input from a trunk port */
-   while (ifp-if_type == IFT_IEEE8023ADLAG) {
-   if (++i  TRUNK_MAX_STACKING) {
-   m_freem(m);
-   return (1);
-   }
-   if (trunk_input(ifp, eh, m) != 0)
-   return (1);
-
-   /* Has been set to the trunk interface */
-   ifp = m-m_pkthdr.rcvif;
-   }
-#endif
-
if ((ifp-if_flags  IFF_UP) == 0) {
m_freem(m);
return (1);
@@ -518,17 +500,9 @@ ether_input(struct mbuf *m, void *hdr)
else
m-m_flags |= M_MCAST;
ifp-if_imcasts++;
-#if NTRUNK  0
-   if (ifp != ifp0)
-   ifp0-if_imcasts++;
-#endif
}
 
ifp-if_ibytes += m-m_pkthdr.len + sizeof(*eh);
-#if NTRUNK  0
-   if (ifp != ifp0)
-   ifp0-if_ibytes += m-m_pkthdr.len + sizeof(*eh);
-#endif
 
etype = ntohs(eh-ether_type);
 
Index: net/if_trunk.c
===
RCS file: /cvs/src/sys/net/if_trunk.c,v
retrieving revision 1.95
diff -u -p -r1.95 if_trunk.c
--- net/if_trunk.c  14 Mar 2015 03:38:51 -  1.95
+++ net/if_trunk.c  5 May 2015 13:09:53 -
@@ -94,14 +94,14 @@ int  trunk_rr_detach(struct trunk_softc 
 voidtrunk_rr_port_destroy(struct trunk_port *);
 int trunk_rr_start(struct trunk_softc *, struct mbuf *);
 int trunk_rr_input(struct trunk_softc *, struct trunk_port *,
-   struct ether_header *, struct mbuf *);
+   struct mbuf *);
 
 /* Active failover */
 int trunk_fail_attach(struct trunk_softc *);
 int trunk_fail_detach(struct trunk_softc *);
 int trunk_fail_start(struct trunk_softc *, struct mbuf *);
 int trunk_fail_input(struct trunk_softc *, struct trunk_port *,
-   struct ether_header *, struct mbuf *);
+   struct mbuf *);
 
 /* Loadbalancing */
 int trunk_lb_attach(struct trunk_softc *);
@@ -110,7 +110,7 @@ int  trunk_lb_port_create(struct trunk_p
 voidtrunk_lb_port_destroy(struct trunk_port *);
 int trunk_lb_start(struct trunk_softc *, struct mbuf *);
 int trunk_lb_input(struct trunk_softc *, struct trunk_port *,
-   struct ether_header *, struct mbuf *);
+   struct mbuf *);
 int trunk_lb_porttable(struct trunk_softc *, struct trunk_port *);
 
 /* Broadcast mode */
@@ -118,14 +118,14 @@ inttrunk_bcast_attach(struct trunk_sof
 int trunk_bcast_detach(struct trunk_softc *);
 int trunk_bcast_start(struct trunk_softc *, struct mbuf *);
 int trunk_bcast_input(struct trunk_softc *, struct trunk_port *,
-   struct ether_header *, struct mbuf *);
+   struct mbuf *);
 
 /* 802.3ad LACP */
 int trunk_lacp_attach(struct trunk_softc *);
 int trunk_lacp_detach(struct trunk_softc *);
 int trunk_lacp_start(struct trunk_softc *, struct mbuf *);
 int trunk_lacp_input(struct trunk_softc *, struct trunk_port *,
-   struct ether_header *, struct mbuf *);
+   struct mbuf *);
 
 /* Trunk protocol table */
 static const struct {
@@ -288,6 +288,7 @@ trunk_port_create(struct trunk_softc *tr
 {
struct trunk_softc *tr_ptr;
struct trunk_port *tp;
+   struct ifih *trunk_ifih;
int error = 0;
 
/* Limit the maximal number of trunk ports */
@@ -326,12 +327,19 @@ trunk_port_create(struct trunk_softc *tr
M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
return (ENOMEM);
 
+   trunk_ifih = malloc(sizeof(*trunk_ifih), M_DEVBUF, M_NOWAIT);
+   if (trunk_ifih == NULL) {
+   free(tp, M_DEVBUF, 0);
+   return (ENOMEM);
+   }
+
/* Check if port is a stacked trunk */
SLIST_FOREACH(tr_ptr, trunk_list, tr_entries) {
if (ifp == tr_ptr-tr_ac.ac_if) {
tp-tp_flags |= TRUNK_PORT_STACK;
if (trunk_port_checkstacking(tr_ptr) =

Re: tcp keep-alives sent without timestamps

2015-05-06 Thread Martin Pieuchot
On 20/04/15(Mon) 18:37, Mike Belopuhov wrote:
 On Tue, Apr 14, 2015 at 22:08 +0300, Lauri Tirkkonen wrote:
  On Tue, Apr 14 2015 20:40:58 +0200, Mike Belopuhov wrote:
   According to 3.2 in RFC 7323:
   
  Once TSopt has been successfully negotiated, that is both SYN and
  SYN,ACK contain TSopt, the TSopt MUST be sent in every non-RST
  segment for the duration of the connection, and SHOULD be sent in an
  RST segment (see Section 5.2 for details).  The TCP SHOULD remember
  this state by setting a flag, referred to as Snd.TS.OK, to one.  If a
  non-RST segment is received without a TSopt, a TCP SHOULD silently
  drop the segment.  A TCP MUST NOT abort a TCP connection because any
  segment lacks an expected TSopt.
  
  Thank you, I somehow missed the existence of this RFC.
 
 
 Does anyone else want to comment on this?

Diff looks good to me.  Since you mentioned other *BSD in your previous
post, I look at what Linux and Solaris do and they both seem to set
timestamps on keep alive packets.  As for MD5 signatures Linux also
include it.

ok mpi@

   I had a stab at adding timestamp support to tcp_respond but couldn't
   test yet.  If you feel like giving it a try, please be my guest.
  
  With your patch, I confirm that timestamps are present on keep-alive
  messages.
  
 
 The patch needs a small (but crucial) amendment: tcp pcb can be NULL...
 
 diff --git sys/netinet/tcp_subr.c sys/netinet/tcp_subr.c
 index c8c8e77..6f17af0 100644
 --- sys/netinet/tcp_subr.c
 +++ sys/netinet/tcp_subr.c
 @@ -295,13 +295,10 @@ tcp_template(tp)
   * attached mbufs.
   *
   * In any case the ack and sequence number of the transmitted
   * segment are as specified by the parameters.
   */
 -#ifdef INET6
 -/* This function looks hairy, because it was so IPv4-dependent. */
 -#endif /* INET6 */
  void
  tcp_respond(struct tcpcb *tp, caddr_t template, struct tcphdr *th0,
  tcp_seq ack, tcp_seq seq, int flags, u_int rtableid)
  {
   int tlen;
 @@ -370,14 +367,10 @@ tcp_respond(struct tcpcb *tp, caddr_t template, struct 
 tcphdr *th0,
   xchg(th-th_dport, th-th_sport, u_int16_t);
   else
   flags = TH_ACK;
  #undef xchg
  
 - m-m_len = tlen;
 - m-m_pkthdr.len = tlen;
 - m-m_pkthdr.rcvif = (struct ifnet *) 0;
   th-th_ack = htonl(ack);
   th-th_x2 = 0;
   th-th_off = sizeof (struct tcphdr)  2;
   th-th_flags = flags;
 @@ -386,10 +379,26 @@ tcp_respond(struct tcpcb *tp, caddr_t template, struct 
 tcphdr *th0,
   if (win  TCP_MAXWIN)
   win = TCP_MAXWIN;
   th-th_win = htons((u_int16_t)win);
   th-th_urp = 0;
  
 + if (tp  (tp-t_flags  (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP 
 + (flags  TH_RST) == 0  (tp-t_flags  TF_RCVD_TSTMP)) {
 + u_int32_t *lp = (u_int32_t *)(th + 1);
 + /* Form timestamp option as shown in appendix A of RFC 1323. */
 + *lp++ = htonl(TCPOPT_TSTAMP_HDR);
 + *lp++ = htonl(tcp_now + tp-ts_modulate);
 + *lp   = htonl(tp-ts_recent);
 + tlen += TCPOLEN_TSTAMP_APPA;
 + th-th_off = (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_APPA)  2;
 + }
 +
 + m-m_len = tlen;
 + m-m_pkthdr.len = tlen;
 + m-m_pkthdr.rcvif = (struct ifnet *) 0;
 + m-m_pkthdr.csum_flags |= M_TCP_CSUM_OUT;
 +
   /* force routing table */
   if (tp)
   m-m_pkthdr.ph_rtableid = tp-t_inpcb-inp_rtableid;
   else
   m-m_pkthdr.ph_rtableid = rtableid;
 



Introducing if_output()

2015-05-07 Thread Martin Pieuchot
This diff is a first step towards removing all pseudo-driver #ifdef
in ether_output().  As for ether_input() the goal of this work is to
provide an elegant design to make it easier to turn pseudo-drivers
MP-safe.

So instead of including some bridge(4), vlan(4) and carp(4) specific
code in ether_output(), I'd like to split this function and call the
interesting chunks in bridge_output(), vlan_output() and carp_output().

The first step is to take the generic code enqueuing packets in its
own function: if_output().

Sadly if_start() is still required for hfsc_deferred().

Comments, ok?

Index: net/bridgestp.c
===
RCS file: /cvs/src/sys/net/bridgestp.c,v
retrieving revision 1.52
diff -u -p -r1.52 bridgestp.c
--- net/bridgestp.c 14 Mar 2015 03:38:51 -  1.52
+++ net/bridgestp.c 28 Apr 2015 12:22:59 -
@@ -357,7 +357,6 @@ bstp_transmit_tcn(struct bstp_state *bs,
struct ifnet *ifp = bp-bp_ifp;
struct ether_header *eh;
struct mbuf *m;
-   int s, len, error;
 
if (ifp == NULL || (ifp-if_flags  IFF_RUNNING) == 0)
return;
@@ -382,16 +381,8 @@ bstp_transmit_tcn(struct bstp_state *bs,
bpdu.tbu_bpdutype = BSTP_MSGTYPE_TCN;
bcopy(bpdu, mtod(m, caddr_t) + sizeof(*eh), sizeof(bpdu));
 
-   s = splnet();
bp-bp_txcount++;
-   len = m-m_pkthdr.len;
-   IFQ_ENQUEUE(ifp-if_snd, m, NULL, error);
-   if (error == 0) {
-   ifp-if_obytes += len;
-   ifp-if_omcasts++;
-   if_start(ifp);
-   }
-   splx(s);
+   if_output(ifp, m);
 }
 
 void
@@ -473,7 +464,7 @@ bstp_send_bpdu(struct bstp_state *bs, st
struct ifnet *ifp = bp-bp_ifp;
struct mbuf *m;
struct ether_header *eh;
-   int s, len, error;
+   int s;
 
s = splnet();
if (ifp == NULL || (ifp-if_flags  IFF_RUNNING) == 0)
@@ -521,13 +512,7 @@ bstp_send_bpdu(struct bstp_state *bs, st
m-m_pkthdr.pf.prio = BSTP_IFQ_PRIO;
 
bp-bp_txcount++;
-   len = m-m_pkthdr.len;
-   IFQ_ENQUEUE(ifp-if_snd, m, NULL, error);
-   if (error == 0) {
-   ifp-if_obytes += len;
-   ifp-if_omcasts++;
-   if_start(ifp);
-   }
+   if_output(ifp, m);
  done:
splx(s);
 }
Index: net/if.c
===
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.330
diff -u -p -r1.330 if.c
--- net/if.c23 Apr 2015 09:45:24 -  1.330
+++ net/if.c28 Apr 2015 12:22:59 -
@@ -421,7 +421,6 @@ if_attach_common(struct ifnet *ifp)
 void
 if_start(struct ifnet *ifp)
 {
-
splassert(IPL_NET);
 
if (ifp-if_snd.ifq_len = min(8, ifp-if_snd.ifq_maxlen) 
@@ -439,6 +438,35 @@ if_start(struct ifnet *ifp)
TAILQ_INSERT_TAIL(iftxlist, ifp, if_txlist);
schednetisr(NETISR_TX);
}
+}
+
+int
+if_output(struct ifnet *ifp, struct mbuf *m)
+{
+   int s, error = 0;
+
+   s = splnet();
+
+   /*
+* Queue message on interface, and start output if interface
+* not yet active.
+*/
+   IFQ_ENQUEUE(ifp-if_snd, m, NULL, error);
+   if (error) {
+   splx(s);
+   return (error);
+   }
+
+   ifp-if_obytes += m-m_pkthdr.len;
+   if (m-m_flags  M_MCAST)
+   ifp-if_omcasts++;
+
+   ifp-if_opackets++;
+   if_start(ifp);
+
+   splx(s);
+
+   return (0);
 }
 
 struct mbuf_queue if_input_queue = MBUF_QUEUE_INITIALIZER(8192, IPL_NET);
Index: net/if_bridge.c
===
RCS file: /cvs/src/sys/net/if_bridge.c,v
retrieving revision 1.235
diff -u -p -r1.235 if_bridge.c
--- net/if_bridge.c 17 Apr 2015 11:04:01 -  1.235
+++ net/if_bridge.c 28 Apr 2015 12:22:59 -
@@ -2683,7 +2683,6 @@ int
 bridge_ifenqueue(struct bridge_softc *sc, struct ifnet *ifp, struct mbuf *m)
 {
int error, len;
-   short mflags;
 
 #if NGIF  0
/* Packet needs etherip encapsulation. */
@@ -2735,18 +2734,15 @@ bridge_ifenqueue(struct bridge_softc *sc
}
 #endif
len = m-m_pkthdr.len;
-   mflags = m-m_flags;
-   IFQ_ENQUEUE(ifp-if_snd, m, NULL, error);
+
+   error = if_output(ifp, m);
if (error) {
sc-sc_if.if_oerrors++;
return (error);
}
+
sc-sc_if.if_opackets++;
sc-sc_if.if_obytes += len;
-   ifp-if_obytes += len;
-   if (mflags  M_MCAST)
-   ifp-if_omcasts++;
-   if_start(ifp);
 
return (0);
 }
Index: net/if_ethersubr.c
===
RCS file: /cvs/src/sys/net/if_ethersubr.c,v
retrieving revision 1.194
diff -u -p -r1.194 if_ethersubr.c
--- net/if_ethersubr.c  13 Apr 2015 08:52:51 -  1.194
+++ net/if_ethersubr.c  28 Apr 2015 12:22:59 -
@@ -256,14 

Re: vfs_shutdown would like to do polled I/O at least on panic

2015-05-08 Thread Martin Pieuchot
On 07/05/15(Thu) 20:58, Mike Belopuhov wrote:
 As I've pointed out before, on panic we can be running on any
 CPU and our disk controller's interrupts can interrupt on the
 other one.  Since we'll most likely be holding a kernel lock,
 dealing with unlocking it might get hairy very fast.  Instead
 what we could do to improve the chances of a clean shutdown on
 panic is to instruct our disk subsystem to do polled I/O that
 will be run on the same CPU with the panic.

Did you consider executing ddb's boot commands on cpu0?  I mean doing
an implicit machine ddbcpu 0 before executing any boot command?

 Initially I wanted to move cold = 1 earlier in boot(), but
 after talking to Miod, it started to look like a bad idea.
 
 Thoughts?
 
 diff --git sys/dev/ata/ata_wdc.c sys/dev/ata/ata_wdc.c
 index 1f52488..aea9ec1 100644
 --- sys/dev/ata/ata_wdc.c
 +++ sys/dev/ata/ata_wdc.c
 @@ -199,20 +199,22 @@ wd_hibernate_io(dev_t dev, daddr_t blkno, vaddr_t addr, 
 size_t size, int op, voi
   */
  int
  wdc_ata_bio(struct ata_drive_datas *drvp, struct ata_bio *ata_bio)
  {
   struct wdc_xfer *xfer;
   struct channel_softc *chp = drvp-chnl_softc;
  
   xfer = wdc_get_xfer(WDC_NOSLEEP);
   if (xfer == NULL)
   return WDC_TRY_AGAIN;
 + if (panicstr)
 + ata_bio-flags |= ATA_POLL;
   if (ata_bio-flags  ATA_POLL)
   xfer-c_flags |= C_POLL;
   if (!(ata_bio-flags  ATA_POLL) 
   (drvp-drive_flags  (DRIVE_DMA | DRIVE_UDMA)) 
   (ata_bio-flags  ATA_SINGLE) == 0 
   (ata_bio-bcount  512 ||
   (chp-wdc-quirks  WDC_QUIRK_NOSHORTDMA) == 0))
   xfer-c_flags |= C_DMA;
   xfer-drive = drvp-drive;
   xfer-cmd = ata_bio;
 diff --git sys/scsi/scsi_base.c sys/scsi/scsi_base.c
 index 9cf6b45..3afcc29 100644
 --- sys/scsi/scsi_base.c
 +++ sys/scsi/scsi_base.c
 @@ -1267,20 +1267,22 @@ scsi_report_luns(struct scsi_link *sc_link, int 
 selectreport,
   return (error);
  }
  
  void
  scsi_xs_exec(struct scsi_xfer *xs)
  {
   xs-error = XS_NOERROR;
   xs-resid = xs-datalen;
   xs-status = 0;
   CLR(xs-flags, ITSDONE);
 + if (panicstr)
 + SET(xs-flags, SCSI_AUTOCONF);
  
  #ifdef SCSIDEBUG
   if (xs-sc_link-flags  SDEV_DB1) {
   scsi_xs_show(xs);
   if (xs-datalen  (xs-flags  SCSI_DATA_OUT))
   scsi_show_mem(xs-data, min(64, xs-datalen));
   }
  #endif
  
   /* The adapter's scsi_cmd() is responsible for calling scsi_done(). */
 



Re: Async upd(4) - patch 7/7

2015-05-11 Thread Martin Pieuchot

On 2015-05-07 04:19, David Higgs wrote:

On Apr 30, 2015, at 7:09 AM, Martin Pieuchot m...@openbsd.org wrote:
[...]


Your tweaks were good, so I tweaked it further:
- When submit fails, invalidate affected sensors as described above.
- When invalidating sensors, do it recursively.
- When battery is not present, invalidate children but not 
BatteryPresent.


Let me know what you think.


Committed thanks!.

I'm looking forward to see more supported sensors added to upd(4) :)

M.



Take vlan(4) out of ether_input()

2015-05-12 Thread Martin Pieuchot
Diff below convert vlan(4) to the new if_input() framework which means
that vlan_input() will now be executed before ether_input().

Compared to trunk(4) multiple vlan(4)s can be attached to the same
parent interface.  When such thing happens only one input handler is
added to keep the if_inputs handler list as small as possible.

With this diff pseudo-drivers using the if_input() framework can now be
stacked.  That's why if_input_process() has been modified to deal with
multiple ifp/lists of handlers.

Reviewers might notice that this diff introduces a behavior change when
a trunk is configured on top of a vlan.  vlan_input() now runs before
trunk_input() which means that the trunk interface now see packets with
encapsulation removed.

I know that various configurations involving vlans are currently broken.
This diff won't fix them but it should not introduce newer problem, so
please test and report back.

Comments and oks are also welcome.

Index: net/if.c
===
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.330
diff -u -p -r1.330 if.c
--- net/if.c23 Apr 2015 09:45:24 -  1.330
+++ net/if.c11 May 2015 13:15:01 -
@@ -497,10 +497,19 @@ if_input_process(void *xmq)
if ((++mit  0x1f) == 0)
yield();
 
+again:
+   /*
+* Pass this mbuf to all input handlers of its
+* interface until it is consumed.
+*/
ifp = m-m_pkthdr.rcvif;
SLIST_FOREACH(ifih, ifp-if_inputs, ifih_next) {
if ((*ifih-ifih_input)(m, NULL))
break;
+
+   /* Pseudo-drivers might be stacked. */
+   if (ifp != m-m_pkthdr.rcvif)
+   goto again;
}
}
splx(s);
Index: net/if_ethersubr.c
===
RCS file: /cvs/src/sys/net/if_ethersubr.c,v
retrieving revision 1.196
diff -u -p -r1.196 if_ethersubr.c
--- net/if_ethersubr.c  11 May 2015 08:41:43 -  1.196
+++ net/if_ethersubr.c  11 May 2015 08:58:48 -
@@ -456,7 +456,7 @@ bad:
 int
 ether_input(struct mbuf *m, void *hdr)
 {
-   struct ifnet *ifp0, *ifp;
+   struct ifnet *ifp;
struct ether_header *eh = hdr;
struct niqueue *inq;
u_int16_t etype;
@@ -469,7 +469,7 @@ ether_input(struct mbuf *m, void *hdr)
 
 
/* mark incoming routing table */
-   ifp = ifp0 = m-m_pkthdr.rcvif;
+   ifp = m-m_pkthdr.rcvif;
m-m_pkthdr.ph_rtableid = ifp-if_rdomain;
 
if (eh == NULL) {
@@ -511,12 +511,6 @@ ether_input(struct mbuf *m, void *hdr)
atomic_setbits_int(netisr, (1  NETISR_RND_DONE));
}
 
-#if NVLAN  0
-   if (((m-m_flags  M_VLANTAG) || etype == ETHERTYPE_VLAN ||
-   etype == ETHERTYPE_QINQ)  (vlan_input(eh, m) == 0))
-   return (1);
-#endif
-
 #if NBRIDGE  0
/*
 * Tap the packet off here for a bridge, if configured and
@@ -565,7 +559,7 @@ ether_input(struct mbuf *m, void *hdr)
 * is for us.  Drop otherwise.
 */
if ((m-m_flags  (M_BCAST|M_MCAST)) == 0 
-   ((ifp-if_flags  IFF_PROMISC) || (ifp0-if_flags  IFF_PROMISC))) {
+   (ifp-if_flags  IFF_PROMISC)) {
if (memcmp(ac-ac_enaddr, eh-ether_dhost, ETHER_ADDR_LEN)) {
m_freem(m);
return (1);
Index: net/if_var.h
===
RCS file: /cvs/src/sys/net/if_var.h,v
retrieving revision 1.25
diff -u -p -r1.25 if_var.h
--- net/if_var.h23 Apr 2015 09:45:24 -  1.25
+++ net/if_var.h11 May 2015 09:10:48 -
@@ -115,6 +115,7 @@ struct  ifqueue {
 struct ifih {
SLIST_ENTRY(ifih) ifih_next;
int (*ifih_input)(struct mbuf *, void *);
+   int   ifih_refcnt;
 };
 
 /*
Index: net/if_vlan.c
===
RCS file: /cvs/src/sys/net/if_vlan.c,v
retrieving revision 1.118
diff -u -p -r1.118 if_vlan.c
--- net/if_vlan.c   22 Apr 2015 06:42:11 -  1.118
+++ net/if_vlan.c   11 May 2015 13:57:02 -
@@ -79,6 +79,8 @@ u_long vlan_tagmask, svlan_tagmask;
 #define TAG_HASH(tag)  (tag  vlan_tagmask)
 LIST_HEAD(vlan_taghash, ifvlan)*vlan_tagh, *svlan_tagh;
 
+
+intvlan_input(struct mbuf *, void *);
 intvlan_output(struct ifnet *, struct mbuf *, struct sockaddr *,
struct rtentry *);
 void   vlan_start(struct ifnet *ifp);
@@ -268,32 +270,44 @@ vlan_start(struct ifnet *ifp)
 }
 
 /*
- * vlan_input() returns 0 if it has consumed the packet, 1 otherwise.
+ * vlan_input() returns 1 if it has consumed the packet, 0 otherwise.
  */
 int
-vlan_input(struct ether_header *eh, struct mbuf *m)
+vlan_input(struct mbuf *m, void 

Re: Introducing if_output()

2015-05-12 Thread Martin Pieuchot
On 07/05/15(Thu) 11:50, Martin Pieuchot wrote:
 This diff is a first step towards removing all pseudo-driver #ifdef
 in ether_output().  As for ether_input() the goal of this work is to
 provide an elegant design to make it easier to turn pseudo-drivers
 MP-safe.
 
 So instead of including some bridge(4), vlan(4) and carp(4) specific
 code in ether_output(), I'd like to split this function and call the
 interesting chunks in bridge_output(), vlan_output() and carp_output().
 
 The first step is to take the generic code enqueuing packets in its
 own function: if_output().
 
 Sadly if_start() is still required for hfsc_deferred().
 
 Comments, ok?

I got one positive test report involving carp, gif, vether  bridge but
nothing else.

Anybody wants to comment or ok?

 Index: net/bridgestp.c
 ===
 RCS file: /cvs/src/sys/net/bridgestp.c,v
 retrieving revision 1.52
 diff -u -p -r1.52 bridgestp.c
 --- net/bridgestp.c   14 Mar 2015 03:38:51 -  1.52
 +++ net/bridgestp.c   28 Apr 2015 12:22:59 -
 @@ -357,7 +357,6 @@ bstp_transmit_tcn(struct bstp_state *bs,
   struct ifnet *ifp = bp-bp_ifp;
   struct ether_header *eh;
   struct mbuf *m;
 - int s, len, error;
  
   if (ifp == NULL || (ifp-if_flags  IFF_RUNNING) == 0)
   return;
 @@ -382,16 +381,8 @@ bstp_transmit_tcn(struct bstp_state *bs,
   bpdu.tbu_bpdutype = BSTP_MSGTYPE_TCN;
   bcopy(bpdu, mtod(m, caddr_t) + sizeof(*eh), sizeof(bpdu));
  
 - s = splnet();
   bp-bp_txcount++;
 - len = m-m_pkthdr.len;
 - IFQ_ENQUEUE(ifp-if_snd, m, NULL, error);
 - if (error == 0) {
 - ifp-if_obytes += len;
 - ifp-if_omcasts++;
 - if_start(ifp);
 - }
 - splx(s);
 + if_output(ifp, m);
  }
  
  void
 @@ -473,7 +464,7 @@ bstp_send_bpdu(struct bstp_state *bs, st
   struct ifnet *ifp = bp-bp_ifp;
   struct mbuf *m;
   struct ether_header *eh;
 - int s, len, error;
 + int s;
  
   s = splnet();
   if (ifp == NULL || (ifp-if_flags  IFF_RUNNING) == 0)
 @@ -521,13 +512,7 @@ bstp_send_bpdu(struct bstp_state *bs, st
   m-m_pkthdr.pf.prio = BSTP_IFQ_PRIO;
  
   bp-bp_txcount++;
 - len = m-m_pkthdr.len;
 - IFQ_ENQUEUE(ifp-if_snd, m, NULL, error);
 - if (error == 0) {
 - ifp-if_obytes += len;
 - ifp-if_omcasts++;
 - if_start(ifp);
 - }
 + if_output(ifp, m);
   done:
   splx(s);
  }
 Index: net/if.c
 ===
 RCS file: /cvs/src/sys/net/if.c,v
 retrieving revision 1.330
 diff -u -p -r1.330 if.c
 --- net/if.c  23 Apr 2015 09:45:24 -  1.330
 +++ net/if.c  28 Apr 2015 12:22:59 -
 @@ -421,7 +421,6 @@ if_attach_common(struct ifnet *ifp)
  void
  if_start(struct ifnet *ifp)
  {
 -
   splassert(IPL_NET);
  
   if (ifp-if_snd.ifq_len = min(8, ifp-if_snd.ifq_maxlen) 
 @@ -439,6 +438,35 @@ if_start(struct ifnet *ifp)
   TAILQ_INSERT_TAIL(iftxlist, ifp, if_txlist);
   schednetisr(NETISR_TX);
   }
 +}
 +
 +int
 +if_output(struct ifnet *ifp, struct mbuf *m)
 +{
 + int s, error = 0;
 +
 + s = splnet();
 +
 + /*
 +  * Queue message on interface, and start output if interface
 +  * not yet active.
 +  */
 + IFQ_ENQUEUE(ifp-if_snd, m, NULL, error);
 + if (error) {
 + splx(s);
 + return (error);
 + }
 +
 + ifp-if_obytes += m-m_pkthdr.len;
 + if (m-m_flags  M_MCAST)
 + ifp-if_omcasts++;
 +
 + ifp-if_opackets++;
 + if_start(ifp);
 +
 + splx(s);
 +
 + return (0);
  }
  
  struct mbuf_queue if_input_queue = MBUF_QUEUE_INITIALIZER(8192, IPL_NET);
 Index: net/if_bridge.c
 ===
 RCS file: /cvs/src/sys/net/if_bridge.c,v
 retrieving revision 1.235
 diff -u -p -r1.235 if_bridge.c
 --- net/if_bridge.c   17 Apr 2015 11:04:01 -  1.235
 +++ net/if_bridge.c   28 Apr 2015 12:22:59 -
 @@ -2683,7 +2683,6 @@ int
  bridge_ifenqueue(struct bridge_softc *sc, struct ifnet *ifp, struct mbuf *m)
  {
   int error, len;
 - short mflags;
  
  #if NGIF  0
   /* Packet needs etherip encapsulation. */
 @@ -2735,18 +2734,15 @@ bridge_ifenqueue(struct bridge_softc *sc
   }
  #endif
   len = m-m_pkthdr.len;
 - mflags = m-m_flags;
 - IFQ_ENQUEUE(ifp-if_snd, m, NULL, error);
 +
 + error = if_output(ifp, m);
   if (error) {
   sc-sc_if.if_oerrors++;
   return (error);
   }
 +
   sc-sc_if.if_opackets++;
   sc-sc_if.if_obytes += len;
 - ifp-if_obytes += len;
 - if (mflags  M_MCAST)
 - ifp-if_omcasts++;
 - if_start(ifp);
  
   return (0);
  }
 Index: net/if_ethersubr.c
 ===
 RCS file: /cvs/src/sys/net/if_ethersubr.c,v
 retrieving revision 1.194

Kill NTRUNK

2015-05-12 Thread Martin Pieuchot
There's only one #if NTRUNK left in the tree, this diff gets rid of it
by overwriting trunk ports' if_output().

ok?

Index: net/if_ethersubr.c
===
RCS file: /cvs/src/sys/net/if_ethersubr.c,v
retrieving revision 1.196
diff -u -p -r1.196 if_ethersubr.c
--- net/if_ethersubr.c  11 May 2015 08:41:43 -  1.196
+++ net/if_ethersubr.c  12 May 2015 11:29:20 -
@@ -87,8 +87,6 @@ didn't get a copy, you may request one f
 #include sys/syslog.h
 #include sys/timeout.h
 
-#include crypto/siphash.h/* required by if_trunk.h */
-
 #include net/if.h
 #include net/netisr.h
 #include net/route.h
@@ -127,11 +125,6 @@ didn't get a copy, you may request one f
 #include net/if_pppoe.h
 #endif
 
-#include trunk.h
-#if NTRUNK  0
-#include net/if_trunk.h
-#endif
-
 #ifdef INET6
 #include netinet6/in6_var.h
 #include netinet6/nd6.h
@@ -273,13 +266,6 @@ ether_output(struct ifnet *ifp0, struct 
ifp-if_rdomain, rtable_l2(m-m_pkthdr.ph_rtableid),
dst-sa_family);
}
-#endif
-
-#if NTRUNK  0
-   /* restrict transmission on trunk members to bpf only */
-   if (ifp-if_type == IFT_IEEE8023ADLAG 
-   (m_tag_find(m, PACKET_TAG_DLT, NULL) == NULL))
-   senderr(EBUSY);
 #endif
 
esrc = ac-ac_enaddr;
Index: net/if_trunk.c
===
RCS file: /cvs/src/sys/net/if_trunk.c,v
retrieving revision 1.96
diff -u -p -r1.96 if_trunk.c
--- net/if_trunk.c  11 May 2015 08:41:43 -  1.96
+++ net/if_trunk.c  12 May 2015 11:28:39 -
@@ -66,6 +66,8 @@ void   trunk_port_watchdog(struct ifnet *
 voidtrunk_port_state(void *);
 voidtrunk_port_ifdetach(void *);
 int trunk_port_ioctl(struct ifnet *, u_long, caddr_t);
+int trunk_port_output(struct ifnet *, struct mbuf *, struct sockaddr *,
+   struct rtentry *);
 struct trunk_port *trunk_port_get(struct trunk_softc *, struct ifnet *);
 int trunk_port_checkstacking(struct trunk_softc *);
 voidtrunk_port2req(struct trunk_port *, struct trunk_reqport *);
@@ -75,6 +77,7 @@ inttrunk_ether_delmulti(struct trunk_s
 voidtrunk_ether_purgemulti(struct trunk_softc *);
 int trunk_ether_cmdmulti(struct trunk_port *, u_long);
 int trunk_ioctl_allports(struct trunk_softc *, u_long, caddr_t);
+int trunk_input(struct mbuf *, void *);
 voidtrunk_start(struct ifnet *);
 voidtrunk_init(struct ifnet *);
 voidtrunk_stop(struct ifnet *);
@@ -354,6 +357,9 @@ trunk_port_create(struct trunk_softc *tr
trunk_ifih-ifih_input = trunk_input;
SLIST_INSERT_HEAD(ifp-if_inputs, trunk_ifih, ifih_next);
 
+   tp-tp_output = ifp-if_output;
+   ifp-if_output = trunk_port_output;
+
ifp-if_tp = (caddr_t)tp;
tp-tp_ioctl = ifp-if_ioctl;
ifp-if_ioctl = trunk_port_ioctl;
@@ -450,6 +456,7 @@ trunk_port_destroy(struct trunk_port *tp
 
ifp-if_watchdog = tp-tp_watchdog;
ifp-if_ioctl = tp-tp_ioctl;
+   ifp-if_output = tp-tp_output;
ifp-if_tp = NULL;
 
hook_disestablish(ifp-if_linkstatehooks, tp-lh_cookie);
@@ -563,6 +570,20 @@ trunk_port_ioctl(struct ifnet *ifp, u_lo
error = (*tp-tp_ioctl)(ifp, cmd, data);
 
return (error);
+}
+
+int
+trunk_port_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+struct rtentry *rt)
+{
+   /* restrict transmission on trunk members to bpf only */
+   if (ifp-if_type == IFT_IEEE8023ADLAG 
+   (m_tag_find(m, PACKET_TAG_DLT, NULL) == NULL)) {
+   m_freem(m);
+   return (EBUSY);
+   }
+
+   return (ether_output(ifp, m, dst, rt));
 }
 
 void
Index: net/if_trunk.h
===
RCS file: /cvs/src/sys/net/if_trunk.h,v
retrieving revision 1.20
diff -u -p -r1.20 if_trunk.h
--- net/if_trunk.h  11 May 2015 08:41:43 -  1.20
+++ net/if_trunk.h  12 May 2015 11:28:39 -
@@ -141,6 +141,8 @@ struct trunk_port {
/* Redirected callbacks */
void(*tp_watchdog)(struct ifnet *);
int (*tp_ioctl)(struct ifnet *, u_long, caddr_t);
+   int (*tp_output)(struct ifnet *, struct mbuf *, struct sockaddr *,
+   struct rtentry *);
 
SLIST_ENTRY(trunk_port) tp_entries;
 };



Re: Small carp(4) diff

2015-05-14 Thread Martin Pieuchot
On 14/05/15(Thu) 08:20, sven falempin wrote:
 On Thu, May 14, 2015 at 6:27 AM, Martin Pieuchot m...@openbsd.org wrote:
  one level of indentation.
 
 
 pre
 if  (X == null ) {dprintf(you broke this); return ;}
 unindented code;
 /pre
 
 Same gain, more dev ease and corner case check.

I disagree.  Adding such check is painful because we have to maintain
more code to deal with the impossible but we now kind-of-safe case
where ifp is NULL.  And what should happen once your CPU returned from
this function?

Having a good NULL pointer dereference is a much better choice.



Small bridge(4) fix

2015-05-15 Thread Martin Pieuchot
If we change the rcvif pointer of a packet we need to run if_input()
again otherwise we might skip the handlers on the new interface.

Ultimately it would be nice to only assign rcvif in  if_input(), but
that's for another diff.

This fix one case I left out in my previous conversion, ok?

Index: net/if_bridge.c
===
RCS file: /cvs/src/sys/net/if_bridge.c,v
retrieving revision 1.238
diff -u -p -r1.238 if_bridge.c
--- net/if_bridge.c 15 May 2015 10:15:13 -  1.238
+++ net/if_bridge.c 15 May 2015 10:50:32 -
@@ -1478,12 +1478,9 @@ bridge_dispatch(struct bridge_iflist *if
 
m-m_pkthdr.rcvif = ifl-ifp;
m-m_pkthdr.ph_rtableid = ifl-ifp-if_rdomain;
-   if (ifp-if_type == IFT_GIF) {
-   m-m_flags |= M_PROTO1;
-   ether_input_mbuf(ifl-ifp, m);
-   m = NULL;
-   }
-   return (m);
+   m-m_flags |= M_PROTO1;
+   ether_input_mbuf(ifl-ifp, m);
+   return (NULL);
}
if (bcmp(ac-ac_enaddr, eh-ether_shost, ETHER_ADDR_LEN) == 0
 #if NCARP  0



vlan+bridge fix

2015-05-15 Thread Martin Pieuchot
I have one setup with multiple interfaces in a bridge and on some of
these interfaces some vlan(4)s.  But there's currently a bug that
prevent us to send (receive is fine) VLAN packets in such config.
Diff below fixes that.

The problem is that vlan_output() does not pass its parent interface
to ether_output().  That's a mis-design that should be fixed later.
The reason for not passing the parent interface is that we want to
tcpdump(8) packets on vlan interfaces and the easiest hack^Wsolution
was to add a bpf handler in vlan_start()*.

Since my vlans are not part of the bridge, the check below is never
true and my packets never go through the bridge.  By moving this
check to if_output() we kill two birds with one diff.  First of
all we fix this vlan bug and secondly we simplify ether_output()
which in turn will allow us to fix all pseudo-interface *output()
functions.

One of the goals of if_output() is to move all bpf handlers instead
of having them in multiple if_start().  Of course, this will also
help us removing the various #if PSEUDODRIVER from our stack...

Ok?

*: Note that for the exact same reason we cannot tcpdump output
packets on a carp(4) interface, this will be fixed at the same
time in upcoming diffs.


Index: net/if_ethersubr.c
===
RCS file: /cvs/src/sys/net/if_ethersubr.c,v
retrieving revision 1.198
diff -u -p -r1.198 if_ethersubr.c
--- net/if_ethersubr.c  15 May 2015 10:15:13 -  1.198
+++ net/if_ethersubr.c  15 May 2015 10:58:37 -
@@ -363,47 +363,6 @@ ether_output(struct ifnet *ifp0, struct 
if (ether_addheader(m, ifp, etype, esrc, edst) == -1)
senderr(ENOBUFS);
 
-#if NBRIDGE  0
-   /*
-* Interfaces that are bridgeports need special handling for output.
-*/
-   if (ifp-if_bridgeport) {
-   struct m_tag *mtag;
-
-   /*
-* Check if this packet has already been sent out through
-* this bridgeport, in which case we simply send it out
-* without further bridge processing.
-*/
-   for (mtag = m_tag_find(m, PACKET_TAG_BRIDGE, NULL); mtag;
-   mtag = m_tag_find(m, PACKET_TAG_BRIDGE, mtag)) {
-#ifdef DEBUG
-   /* Check that the information is there */
-   if (mtag-m_tag_len != sizeof(caddr_t)) {
-   error = EINVAL;
-   goto bad;
-   }
-#endif
-   if (!memcmp(ifp-if_bridgeport, mtag + 1,
-   sizeof(caddr_t)))
-   break;
-   }
-   if (mtag == NULL) {
-   /* Attach a tag so we can detect loops */
-   mtag = m_tag_get(PACKET_TAG_BRIDGE, sizeof(caddr_t),
-   M_NOWAIT);
-   if (mtag == NULL) {
-   error = ENOBUFS;
-   goto bad;
-   }
-   memcpy(mtag + 1, ifp-if_bridgeport, sizeof(caddr_t));
-   m_tag_prepend(m, mtag);
-   error = bridge_output(ifp, m, NULL, NULL);
-   return (error);
-   }
-   }
-#endif
-
len = m-m_pkthdr.len;
 
error = if_output(ifp, m);
Index: net/if.c
===
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.331
diff -u -p -r1.331 if.c
--- net/if.c15 May 2015 10:15:13 -  1.331
+++ net/if.c15 May 2015 10:58:37 -
@@ -450,6 +450,40 @@ if_output(struct ifnet *ifp, struct mbuf
length = m-m_pkthdr.len;
mflags = m-m_flags;
 
+#if NBRIDGE  0
+   /*
+* Interfaces that are bridgeports need special handling for output.
+*/
+   if (ifp-if_bridgeport) {
+   struct m_tag *mtag;
+
+   /*
+* Check if this packet has already been sent out through
+* this bridgeport, in which case we simply send it out
+* without further bridge processing.
+*/
+   for (mtag = m_tag_find(m, PACKET_TAG_BRIDGE, NULL); mtag;
+   mtag = m_tag_find(m, PACKET_TAG_BRIDGE, mtag)) {
+   if (!memcmp(ifp-if_bridgeport, mtag + 1,
+   sizeof(caddr_t)))
+   break;
+   }
+   if (mtag == NULL) {
+   /* Attach a tag so we can detect loops */
+   mtag = m_tag_get(PACKET_TAG_BRIDGE, sizeof(caddr_t),
+   M_NOWAIT);
+   if (mtag == NULL) {
+   m_freem(m);
+   return (ENOBUFS);
+   }
+   memcpy(mtag + 1, 

Re: Introducing if_output()

2015-05-12 Thread Martin Pieuchot
On 12/05/15(Tue) 12:15, Martin Pieuchot wrote:
 On 07/05/15(Thu) 11:50, Martin Pieuchot wrote:
  This diff is a first step towards removing all pseudo-driver #ifdef
  in ether_output().  As for ether_input() the goal of this work is to
  provide an elegant design to make it easier to turn pseudo-drivers
  MP-safe.
  
  So instead of including some bridge(4), vlan(4) and carp(4) specific
  code in ether_output(), I'd like to split this function and call the
  interesting chunks in bridge_output(), vlan_output() and carp_output().
  
  The first step is to take the generic code enqueuing packets in its
  own function: if_output().
  
  Sadly if_start() is still required for hfsc_deferred().
  
  Comments, ok?
 
 I got one positive test report involving carp, gif, vether  bridge but
 nothing else.

Rafael Zalamena pointed a double if_opackets increment in vlan, diff
below fixes that.  This version also left the net80211 bits out because
wireless interfaces does no play well with IFXF_TXREADY hack...

 Anybody wants to comment or ok?

Index: sys/net/bridgestp.c
===
RCS file: /cvs/src/sys/net/bridgestp.c,v
retrieving revision 1.54
diff -u -p -r1.54 bridgestp.c
--- sys/net/bridgestp.c 12 May 2015 12:35:10 -  1.54
+++ sys/net/bridgestp.c 12 May 2015 12:40:47 -
@@ -353,7 +353,6 @@ bstp_transmit_tcn(struct bstp_state *bs,
struct ifnet *ifp = bp-bp_ifp;
struct ether_header *eh;
struct mbuf *m;
-   int s, len, error;
 
if (ifp == NULL || (ifp-if_flags  IFF_RUNNING) == 0)
return;
@@ -378,16 +377,8 @@ bstp_transmit_tcn(struct bstp_state *bs,
bpdu.tbu_bpdutype = BSTP_MSGTYPE_TCN;
bcopy(bpdu, mtod(m, caddr_t) + sizeof(*eh), sizeof(bpdu));
 
-   s = splnet();
bp-bp_txcount++;
-   len = m-m_pkthdr.len;
-   IFQ_ENQUEUE(ifp-if_snd, m, NULL, error);
-   if (error == 0) {
-   ifp-if_obytes += len;
-   ifp-if_omcasts++;
-   if_start(ifp);
-   }
-   splx(s);
+   if_output(ifp, m);
 }
 
 void
@@ -469,7 +460,7 @@ bstp_send_bpdu(struct bstp_state *bs, st
struct ifnet *ifp = bp-bp_ifp;
struct mbuf *m;
struct ether_header *eh;
-   int s, len, error;
+   int s;
 
s = splnet();
if (ifp == NULL || (ifp-if_flags  IFF_RUNNING) == 0)
@@ -517,13 +508,7 @@ bstp_send_bpdu(struct bstp_state *bs, st
m-m_pkthdr.pf.prio = BSTP_IFQ_PRIO;
 
bp-bp_txcount++;
-   len = m-m_pkthdr.len;
-   IFQ_ENQUEUE(ifp-if_snd, m, NULL, error);
-   if (error == 0) {
-   ifp-if_obytes += len;
-   ifp-if_omcasts++;
-   if_start(ifp);
-   }
+   if_output(ifp, m);
  done:
splx(s);
 }
Index: sys/net/if.c
===
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.330
diff -u -p -r1.330 if.c
--- sys/net/if.c23 Apr 2015 09:45:24 -  1.330
+++ sys/net/if.c12 May 2015 12:40:47 -
@@ -441,6 +441,35 @@ if_start(struct ifnet *ifp)
}
 }
 
+int
+if_output(struct ifnet *ifp, struct mbuf *m)
+{
+   int s, error = 0;
+
+   s = splnet();
+
+   /*
+* Queue message on interface, and start output if interface
+* not yet active.
+*/
+   IFQ_ENQUEUE(ifp-if_snd, m, NULL, error);
+   if (error) {
+   splx(s);
+   return (error);
+   }
+
+   ifp-if_obytes += m-m_pkthdr.len;
+   if (m-m_flags  M_MCAST)
+   ifp-if_omcasts++;
+
+   ifp-if_opackets++;
+   if_start(ifp);
+
+   splx(s);
+
+   return (0);
+}
+
 struct mbuf_queue if_input_queue = MBUF_QUEUE_INITIALIZER(8192, IPL_NET);
 struct task if_input_task = TASK_INITIALIZER(if_input_process, 
if_input_queue);
 
Index: sys/net/if_bridge.c
===
RCS file: /cvs/src/sys/net/if_bridge.c,v
retrieving revision 1.237
diff -u -p -r1.237 if_bridge.c
--- sys/net/if_bridge.c 7 May 2015 01:55:43 -   1.237
+++ sys/net/if_bridge.c 12 May 2015 12:40:48 -
@@ -2693,7 +2693,6 @@ int
 bridge_ifenqueue(struct bridge_softc *sc, struct ifnet *ifp, struct mbuf *m)
 {
int error, len;
-   short mflags;
 
 #if NGIF  0
/* Packet needs etherip encapsulation. */
@@ -2745,18 +2744,15 @@ bridge_ifenqueue(struct bridge_softc *sc
}
 #endif
len = m-m_pkthdr.len;
-   mflags = m-m_flags;
-   IFQ_ENQUEUE(ifp-if_snd, m, NULL, error);
+
+   error = if_output(ifp, m);
if (error) {
sc-sc_if.if_oerrors++;
return (error);
}
+
sc-sc_if.if_opackets++;
sc-sc_if.if_obytes += len;
-   ifp-if_obytes += len;
-   if (mflags  M_MCAST)
-   ifp-if_omcasts++;
-   if_start(ifp);
 
return (0);
 }
Index: sys/net/if_ethersubr.c

Small carp(4) diff

2015-05-14 Thread Martin Pieuchot
ifp can never be NULL in carp_set_ifp(), so remove the check and gain
one level of indentation.

ok?

Index: netinet/ip_carp.c
===
RCS file: /cvs/src/sys/netinet/ip_carp.c,v
retrieving revision 1.254
diff -u -p -r1.254 ip_carp.c
--- netinet/ip_carp.c   28 Apr 2015 14:51:50 -  1.254
+++ netinet/ip_carp.c   14 May 2015 10:18:52 -
@@ -1665,71 +1665,67 @@ carp_set_ifp(struct carp_softc *sc, stru
if (ifp == sc-sc_carpdev)
return (0);
 
-   if (ifp != NULL) {
-   if ((ifp-if_flags  IFF_MULTICAST) == 0)
-   return (EADDRNOTAVAIL);
+   if ((ifp-if_flags  IFF_MULTICAST) == 0)
+   return (EADDRNOTAVAIL);
 
-   if (ifp-if_type == IFT_CARP)
-   return (EINVAL);
-
-   if (ifp-if_carp == NULL) {
-   ncif = malloc(sizeof(*cif), M_IFADDR, M_NOWAIT|M_ZERO);
-   if (ncif == NULL)
-   return (ENOBUFS);
-   if ((error = ifpromisc(ifp, 1))) {
-   free(ncif, M_IFADDR, sizeof(*ncif));
-   return (error);
-   }
+   if (ifp-if_type == IFT_CARP)
+   return (EINVAL);
 
-   ncif-vhif_ifp = ifp;
-   TAILQ_INIT(ncif-vhif_vrs);
-   } else {
-   cif = (struct carp_if *)ifp-if_carp;
-   if (carp_check_dup_vhids(sc, cif, NULL))
-   return (EINVAL);
+   if (ifp-if_carp == NULL) {
+   ncif = malloc(sizeof(*cif), M_IFADDR, M_NOWAIT|M_ZERO);
+   if (ncif == NULL)
+   return (ENOBUFS);
+   if ((error = ifpromisc(ifp, 1))) {
+   free(ncif, M_IFADDR, sizeof(*ncif));
+   return (error);
}
 
-   /* detach from old interface */
-   if (sc-sc_carpdev != NULL)
-   carpdetach(sc);
-
-   /* attach carp interface to physical interface */
-   if (ncif != NULL)
-   ifp-if_carp = (caddr_t)ncif;
-   sc-sc_carpdev = ifp;
-   sc-sc_if.if_capabilities = ifp-if_capabilities 
-   IFCAP_CSUM_MASK;
+   ncif-vhif_ifp = ifp;
+   TAILQ_INIT(ncif-vhif_vrs);
+   } else {
cif = (struct carp_if *)ifp-if_carp;
-   TAILQ_FOREACH(vr, cif-vhif_vrs, sc_list) {
-   if (vr == sc)
-   myself = 1;
-   if (LIST_FIRST(vr-carp_vhosts)-vhid 
-   LIST_FIRST(sc-carp_vhosts)-vhid)
-   after = vr;
-   }
+   if (carp_check_dup_vhids(sc, cif, NULL))
+   return (EINVAL);
+   }
 
-   if (!myself) {
-   /* We're trying to keep things in order */
-   if (after == NULL) {
-   TAILQ_INSERT_TAIL(cif-vhif_vrs, sc, sc_list);
-   } else {
-   TAILQ_INSERT_AFTER(cif-vhif_vrs, after,
-   sc, sc_list);
-   }
-   cif-vhif_nvrs++;
-   }
-   if (sc-sc_naddrs || sc-sc_naddrs6)
-   sc-sc_if.if_flags |= IFF_UP;
-   carp_set_enaddr(sc);
-   s = splnet();
-   sc-lh_cookie = hook_establish(ifp-if_linkstatehooks, 1,
-   carp_carpdev_state, ifp);
-   carp_carpdev_state(ifp);
-   splx(s);
-   } else {
+   /* detach from old interface */
+   if (sc-sc_carpdev != NULL)
carpdetach(sc);
-   sc-sc_if.if_flags = ~(IFF_UP|IFF_RUNNING);
+
+   /* attach carp interface to physical interface */
+   if (ncif != NULL)
+   ifp-if_carp = (caddr_t)ncif;
+   sc-sc_carpdev = ifp;
+   sc-sc_if.if_capabilities = ifp-if_capabilities 
+   IFCAP_CSUM_MASK;
+   cif = (struct carp_if *)ifp-if_carp;
+   TAILQ_FOREACH(vr, cif-vhif_vrs, sc_list) {
+   if (vr == sc)
+   myself = 1;
+   if (LIST_FIRST(vr-carp_vhosts)-vhid 
+   LIST_FIRST(sc-carp_vhosts)-vhid)
+   after = vr;
}
+
+   if (!myself) {
+   /* We're trying to keep things in order */
+   if (after == NULL) {
+   TAILQ_INSERT_TAIL(cif-vhif_vrs, sc, sc_list);
+   } else {
+   TAILQ_INSERT_AFTER(cif-vhif_vrs, after,
+   sc, sc_list);
+   }
+   cif-vhif_nvrs++;
+   }
+   if (sc-sc_naddrs || sc-sc_naddrs6)
+   

Re: vlan+bridge fix

2015-05-19 Thread Martin Pieuchot
On 15/05/15(Fri) 17:34, mxb wrote:
 Diff is applied. So far no problems.
 Unfortunately I can’t test this fully - no vlans on my side.

Thanks for testing.  A no regression report is always welcome.

There's some more issues with bridge+vlan but jasper@ also confirmed
this diff improve the situation.

Can I have oks?

  On 15 maj 2015, at 13:14, Martin Pieuchot m...@openbsd.org wrote:
  
  I have one setup with multiple interfaces in a bridge and on some of
  these interfaces some vlan(4)s.  But there's currently a bug that
  prevent us to send (receive is fine) VLAN packets in such config.
  Diff below fixes that.
  
  The problem is that vlan_output() does not pass its parent interface
  to ether_output().  That's a mis-design that should be fixed later.
  The reason for not passing the parent interface is that we want to
  tcpdump(8) packets on vlan interfaces and the easiest hack^Wsolution
  was to add a bpf handler in vlan_start()*.
  
  Since my vlans are not part of the bridge, the check below is never
  true and my packets never go through the bridge.  By moving this
  check to if_output() we kill two birds with one diff.  First of
  all we fix this vlan bug and secondly we simplify ether_output()
  which in turn will allow us to fix all pseudo-interface *output()
  functions.
  
  One of the goals of if_output() is to move all bpf handlers instead
  of having them in multiple if_start().  Of course, this will also
  help us removing the various #if PSEUDODRIVER from our stack...
  
  Ok?
  
  *: Note that for the exact same reason we cannot tcpdump output
  packets on a carp(4) interface, this will be fixed at the same
  time in upcoming diffs.
  
  
  Index: net/if_ethersubr.c
  ===
  RCS file: /cvs/src/sys/net/if_ethersubr.c,v
  retrieving revision 1.198
  diff -u -p -r1.198 if_ethersubr.c
  --- net/if_ethersubr.c  15 May 2015 10:15:13 -  1.198
  +++ net/if_ethersubr.c  15 May 2015 10:58:37 -
  @@ -363,47 +363,6 @@ ether_output(struct ifnet *ifp0, struct 
  if (ether_addheader(m, ifp, etype, esrc, edst) == -1)
  senderr(ENOBUFS);
  
  -#if NBRIDGE  0
  -   /*
  -* Interfaces that are bridgeports need special handling for output.
  -*/
  -   if (ifp-if_bridgeport) {
  -   struct m_tag *mtag;
  -
  -   /*
  -* Check if this packet has already been sent out through
  -* this bridgeport, in which case we simply send it out
  -* without further bridge processing.
  -*/
  -   for (mtag = m_tag_find(m, PACKET_TAG_BRIDGE, NULL); mtag;
  -   mtag = m_tag_find(m, PACKET_TAG_BRIDGE, mtag)) {
  -#ifdef DEBUG
  -   /* Check that the information is there */
  -   if (mtag-m_tag_len != sizeof(caddr_t)) {
  -   error = EINVAL;
  -   goto bad;
  -   }
  -#endif
  -   if (!memcmp(ifp-if_bridgeport, mtag + 1,
  -   sizeof(caddr_t)))
  -   break;
  -   }
  -   if (mtag == NULL) {
  -   /* Attach a tag so we can detect loops */
  -   mtag = m_tag_get(PACKET_TAG_BRIDGE, sizeof(caddr_t),
  -   M_NOWAIT);
  -   if (mtag == NULL) {
  -   error = ENOBUFS;
  -   goto bad;
  -   }
  -   memcpy(mtag + 1, ifp-if_bridgeport, sizeof(caddr_t));
  -   m_tag_prepend(m, mtag);
  -   error = bridge_output(ifp, m, NULL, NULL);
  -   return (error);
  -   }
  -   }
  -#endif
  -
  len = m-m_pkthdr.len;
  
  error = if_output(ifp, m);
  Index: net/if.c
  ===
  RCS file: /cvs/src/sys/net/if.c,v
  retrieving revision 1.331
  diff -u -p -r1.331 if.c
  --- net/if.c15 May 2015 10:15:13 -  1.331
  +++ net/if.c15 May 2015 10:58:37 -
  @@ -450,6 +450,40 @@ if_output(struct ifnet *ifp, struct mbuf
  length = m-m_pkthdr.len;
  mflags = m-m_flags;
  
  +#if NBRIDGE  0
  +   /*
  +* Interfaces that are bridgeports need special handling for output.
  +*/
  +   if (ifp-if_bridgeport) {
  +   struct m_tag *mtag;
  +
  +   /*
  +* Check if this packet has already been sent out through
  +* this bridgeport, in which case we simply send it out
  +* without further bridge processing.
  +*/
  +   for (mtag = m_tag_find(m, PACKET_TAG_BRIDGE, NULL); mtag;
  +   mtag = m_tag_find(m, PACKET_TAG_BRIDGE, mtag)) {
  +   if (!memcmp(ifp-if_bridgeport, mtag + 1,
  +   sizeof(caddr_t)))
  +   break;
  +   }
  +   if (mtag == NULL

Re: More if_output()

2015-05-19 Thread Martin Pieuchot
On 15/05/15(Fri) 15:53, Martin Pieuchot wrote:
 Some more if_output() conversion.  The xl bits are here because I'd
 like to reduce the number of places where IFQ_ENQUEUE() is used.
 
 After applying this diff you should only have a couple left.

Anyone?

 Ok?
 
 Index: dev/usb/if_upl.c
 ===
 RCS file: /cvs/src/sys/dev/usb/if_upl.c,v
 retrieving revision 1.64
 diff -u -p -r1.64 if_upl.c
 --- dev/usb/if_upl.c  10 Apr 2015 08:41:43 -  1.64
 +++ dev/usb/if_upl.c  15 May 2015 13:43:51 -
 @@ -888,28 +888,5 @@ int
  upl_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
  struct rtentry *rt0)
  {
 - int s, len, error;
 -
 - DPRINTFN(10,(%s: %s: enter\n,
 -  ((struct upl_softc *)ifp-if_softc)-sc_dev.dv_xname,
 -  __func__));
 -
 - len = m-m_pkthdr.len;
 - s = splnet();
 - /*
 -  * Queue message on interface, and start output if interface
 -  * not yet active.
 -  */
 - IFQ_ENQUEUE(ifp-if_snd, m, NULL, error);
 - if (error) {
 - /* mbuf is already freed */
 - splx(s);
 - return (error);
 - }
 - ifp-if_obytes += len;
 - if ((ifp-if_flags  IFF_OACTIVE) == 0)
 - (*ifp-if_start)(ifp);
 - splx(s);
 -
 - return (0);
 + return (if_output(ifp, m));
  }
 Index: dev/ic/xl.c
 ===
 RCS file: /cvs/src/sys/dev/ic/xl.c,v
 retrieving revision 1.123
 diff -u -p -r1.123 xl.c
 --- dev/ic/xl.c   24 Mar 2015 11:23:02 -  1.123
 +++ dev/ic/xl.c   15 May 2015 13:43:24 -
 @@ -177,9 +177,6 @@ int xl_list_tx_init_90xB(struct xl_softc
  void xl_wait(struct xl_softc *);
  void xl_mediacheck(struct xl_softc *);
  void xl_choose_xcvr(struct xl_softc *, int);
 -#ifdef notdef
 -void xl_testpacket(struct xl_softc *);
 -#endif
  
  int xl_miibus_readreg(struct device *, int, int);
  void xl_miibus_writereg(struct device *, int, int, int);
 @@ -659,35 +656,6 @@ xl_iff_905b(struct xl_softc *sc)
  
   XL_SEL_WIN(7);
  }
 -
 -#ifdef notdef
 -void
 -xl_testpacket(struct xl_softc *sc)
 -{
 - struct mbuf *m;
 - struct ifnet*ifp;
 - int error;
 -
 - ifp = sc-sc_arpcom.ac_if;
 -
 - MGETHDR(m, M_DONTWAIT, MT_DATA);
 -
 - if (m == NULL)
 - return;
 -
 - memcpy(mtod(m, struct ether_header *)-ether_dhost,
 - sc-sc_arpcom.ac_enaddr, ETHER_ADDR_LEN);
 - memcpy(mtod(m, struct ether_header *)-ether_shost,
 - sc-sc_arpcom.ac_enaddr, ETHER_ADDR_LEN);
 - mtod(m, struct ether_header *)-ether_type = htons(3);
 - mtod(m, unsigned char *)[14] = 0;
 - mtod(m, unsigned char *)[15] = 0;
 - mtod(m, unsigned char *)[16] = 0xE3;
 - m-m_len = m-m_pkthdr.len = sizeof(struct ether_header) + 3;
 - IFQ_ENQUEUE(ifp-if_snd, m, NULL, error);
 - xl_start(ifp);
 -}
 -#endif
  
  void
  xl_setcfg(struct xl_softc *sc)
 Index: net80211/ieee80211_input.c
 ===
 RCS file: /cvs/src/sys/net80211/ieee80211_input.c,v
 retrieving revision 1.133
 diff -u -p -r1.133 ieee80211_input.c
 --- net80211/ieee80211_input.c14 Mar 2015 03:38:51 -  1.133
 +++ net80211/ieee80211_input.c15 May 2015 13:43:51 -
 @@ -827,7 +827,6 @@ ieee80211_deliver_data(struct ieee80211c
   !(ic-ic_flags  IEEE80211_F_NOBRIDGE) 
   eh-ether_type != htons(ETHERTYPE_PAE)) {
   struct ieee80211_node *ni1;
 - int error, len;
  
   if (ETHER_IS_MULTICAST(eh-ether_dhost)) {
   m1 = m_copym2(m, 0, M_COPYALL, M_DONTWAIT);
 @@ -843,18 +842,8 @@ ieee80211_deliver_data(struct ieee80211c
   m = NULL;
   }
   }
 - if (m1 != NULL) {
 - len = m1-m_pkthdr.len;
 - IFQ_ENQUEUE(ifp-if_snd, m1, NULL, error);
 - if (error)
 - ifp-if_oerrors++;
 - else {
 - if (m != NULL)
 - ifp-if_omcasts++;
 - ifp-if_obytes += len;
 - if_start(ifp);
 - }
 - }
 + if (m1 != NULL)
 + if_output(ifp, m1);
   }
  #endif
   if (m != NULL) {
 Index: net80211/ieee80211_output.c
 ===
 RCS file: /cvs/src/sys/net80211/ieee80211_output.c,v
 retrieving revision 1.94
 diff -u -p -r1.94 ieee80211_output.c
 --- net80211/ieee80211_output.c   14 Mar 2015 03:38:51 -  1.94
 +++ net80211/ieee80211_output.c   15 May 2015 13:43:51 -
 @@ -113,8 +113,7 @@ ieee80211_output(struct ifnet *ifp, stru
  {
   struct ieee80211_frame *wh;
   struct m_tag

Re: xhci, thinkpad usb ports, uhub0: device problem

2015-04-14 Thread Martin Pieuchot
On 13/04/15(Mon) 22:39, Rafal Brodewicz wrote:
 Hi tech@
 
 Enabling usb3 support in my ThinkPad T440s causes dock station usb ports
 to stop working. I see uhub0: device problem, disabling port 3 message
 in dmesg.
 dmesg with usb3 on and off included.

Seems that the hub in your dock fails to attach,  could you build a
kernel with XHCI_DEBUG and send me the dmesg?

 OpenBSD 5.7-current (GENERIC.MP) #4: Fri Apr 10 21:36:33 CEST 2015
 r...@current.bro:/usr/src/sys/arch/amd64/compile/GENERIC.MP
 real mem = 12539600896 (11958MB)
 avail mem = 12155719680 (11592MB)
 mpath0 at root
 scsibus0 at mpath0: 256 targets
 mainbus0 at root
 bios0 at mainbus0: SMBIOS rev. 2.7 @ 0xbcc0d000 (61 entries)
 bios0: vendor LENOVO version GJET82WW (2.32 ) date 01/09/2015
 bios0: LENOVO 20ARA03LPB
 acpi0 at bios0: rev 2
 acpi0: sleep states S0 S3 S4 S5
 acpi0: tables DSDT FACP SLIC DBGP ECDT HPET APIC MCFG SSDT SSDT SSDT SSDT 
 SSDT SSDT SSDT SSDT PCCT SSDT TCPA UEFI MSDM ASF! BATB FPDT UEFI SSDT DMAR
 acpi0: wakeup devices LID_(S4) SLPB(S3) IGBE(S4) EXP2(S4) XHCI(S3) EHC1(S3)
 acpitimer0 at acpi0: 3579545 Hz, 24 bits
 acpiec0 at acpi0
 acpihpet0 at acpi0: 14318179 Hz
 acpimadt0 at acpi0 addr 0xfee0: PC-AT compat
 cpu0 at mainbus0: apid 0 (boot processor)
 cpu0: Intel(R) Core(TM) i5-4300U CPU @ 1.90GHz, 1796.11 MHz
 cpu0: 
 FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,FMA3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,NXE,PAGE1GB,LONG,LAHF,ABM,PERF,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID
 cpu0: 256KB 64b/line 8-way L2 cache
 cpu0: smt 0, core 0, package 0
 mtrr: Pentium Pro MTRR support, 10 var ranges, 88 fixed ranges
 cpu0: apic clock running at 99MHz
 cpu0: mwait min=64, max=64, C-substates=0.2.1.2.4, IBE
 cpu1 at mainbus0: apid 1 (application processor)
 cpu1: Intel(R) Core(TM) i5-4300U CPU @ 1.90GHz, 1795.85 MHz
 cpu1: 
 FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,FMA3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,NXE,PAGE1GB,LONG,LAHF,ABM,PERF,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID
 cpu1: 256KB 64b/line 8-way L2 cache
 cpu1: smt 1, core 0, package 0
 cpu2 at mainbus0: apid 2 (application processor)
 cpu2: Intel(R) Core(TM) i5-4300U CPU @ 1.90GHz, 1795.85 MHz
 cpu2: 
 FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,FMA3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,NXE,PAGE1GB,LONG,LAHF,ABM,PERF,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID
 cpu2: 256KB 64b/line 8-way L2 cache
 cpu2: smt 0, core 1, package 0
 cpu3 at mainbus0: apid 3 (application processor)
 cpu3: Intel(R) Core(TM) i5-4300U CPU @ 1.90GHz, 1795.85 MHz
 cpu3: 
 FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,FMA3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,NXE,PAGE1GB,LONG,LAHF,ABM,PERF,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID
 cpu3: 256KB 64b/line 8-way L2 cache
 cpu3: smt 1, core 1, package 0
 ioapic0 at mainbus0: apid 2 pa 0xfec0, version 20, 40 pins
 acpimcfg0 at acpi0 addr 0xf800, bus 0-63
 acpiprt0 at acpi0: bus 0 (PCI0)
 acpiprt1 at acpi0: bus -1 (PEG_)
 acpiprt2 at acpi0: bus 2 (EXP1)
 acpiprt3 at acpi0: bus 3 (EXP2)
 acpiprt4 at acpi0: bus -1 (EXP3)
 acpicpu0 at acpi0: C3, C1, PSS
 acpicpu1 at acpi0: C3, C1, PSS
 acpicpu2 at acpi0: C3, C1, PSS
 acpicpu3 at acpi0: C3, C1, PSS
 acpipwrres0 at acpi0: PUBS, resource for XHCI, EHC1
 acpipwrres1 at acpi0: NVP3, resource for PEG_
 acpipwrres2 at acpi0: NVP2, resource for PEG_
 acpitz0 at acpi0: critical temperature is 200 degC
 acpibtn0 at acpi0: LID_
 acpibtn1 at acpi0: SLPB
 acpibat0 at acpi0: BAT0 model 45N1773 serial 29680 type LION oem SANYO
 acpibat1 at acpi0: BAT1 model 45N1127 serial 11779 type LION oem LGC
 acpiac0 at acpi0: AC unit online
 acpithinkpad0 at acpi0
 cpu0: Enhanced SpeedStep 1796 MHz: speeds: 2501, 2500, 2400, 2200, 2100, 
 1900, 1800, 1700, 1600, 1500, 1300, 1200, 1100, 1000, 800, 775 MHz
 pci0 at mainbus0 bus 0
 pchb0 at pci0 dev 0 function 0 Intel Core 4G Host rev 0x0b
 vga1 at pci0 dev 2 function 0 Intel HD Graphics rev 0x0b
 intagp at vga1 not configured
 inteldrm0 at vga1
 drm0 at inteldrm0
 error: [drm:pid0:i915_write32] *ERROR* Unknown unclaimed register before 
 writing to 10
 error: [drm:pid0:intel_dp_set_link_train] *ERROR* Timed out waiting for DP 
 idle patterns
 error: [drm:pid0:i915_write32] *ERROR* Unknown unclaimed register before 
 writing to 64040
 

Re: [PATCH] Add support for dumping descriptors to usbdevs(8)

2015-04-16 Thread Martin Pieuchot
On 16/04/15(Thu) 10:02, Dimitris Papastamos wrote:
 ping

A bit too soon to ping ;)  You could wait at least a week!



Re: [NEW] Driver for the Araneus Alea II USB TRNG

2015-04-16 Thread Martin Pieuchot
On 15/04/15(Wed) 10:46, attila wrote:
 Martin Pieuchot m...@openbsd.org writes:
 
  On 14/04/15(Tue) 15:22, attila wrote:
  Martin Pieuchot m...@openbsd.org writes:
   
   static const struct usb_devno ualea_devs[] = {
   { USB_VENDOR_ARANEUS,   USB_PRODUCT_ARANEUS_ALEA }
   };
  
   Is it possible to match your device based on the content of the device
   descriptor instead of whitelisting IDs?  Whitelisting means that if the
   company produce a compatible device with a new ID we'll need to modify
   the driver.
  
  
  Sadly, I don't think it is possible... you mean by looking at
  bDeviceClass/bDeviceSubClass/bDeviceProtocol?  He only gives me zeroes
  [...]
  Perhaps I am misunderstanding; is there something else in there I
  could/should match on?  I've changed the attach routine in the updated
  version to check vendor/product/iface, at least.
 
  I was thinking at bInterfaceClass and bInterfaceProtocol but when they
  are both to vendor defined (255), matching against the ID is the right
  thing to do. 
 
  I looked and it appears that M_ZERO is used when allocating the memory
  for all of these structures, so I take it that explicit zeroing of
  things when tearing down is neither required nor desired?  I removed
  this kind of thing from ualea.c because it looked like it wasn't
  necessary.
 
  That's right.
 
  I'm attaching the updated driver.
  
  Thank you for the critique.  I suppose I need to write a man page for
  this as well... working on it.
 
  Perfect, I'll put it in tree together with your driver :)
 
 
 Man page attached.  I am also attaching an updated copy of the driver
 (only copyright changed).
 
 Thanks so much for your help.  I look forward to contributing more.

Committed thanks.  I removed some unneeded headers and applied jmc@'s
tweak.

In the future I'd suggest you to send unified diff against the CVS tree,
it's easier to apply  test!

Cheers,
Martin



Re: [PATCH] Add support for dumping descriptors to usbdevs(8)

2015-04-16 Thread Martin Pieuchot
On 13/04/15(Mon) 17:49, Dimitris Papastamos wrote:
 Hi,
 
 Attached below an updated patch that makes the layout a bit more sensible.

It's nice it works. 

Now like various students that applied for the GSoC you've done the
first step.  The interesting part of that GSoC is also the boring part:
the parser.  How are you gonna deal with AUDIO, VIDEO and HID descriptors?

I'm asking the question but I'm not really interested in the answer.  I
don't see the point of having multiple people working on the same task.
But if a student is selected for this task, you might want to discuss
that with him.

Honestly if you are looking for an easy task to contribute to the USB
land in OpenBSD you could have asked before, there's a lot to do :)

So here's a small task, just for you:  can you add an option to
usbdev(8) to enable/disable USB bus probing?   By default it would be
on, just like know: when you plug a device is it detected/attached.

If you turn it off to new device will be attached and port events
will simply be acknowledged?



Re: [NEW] Driver for the Araneus Alea II USB TRNG

2015-04-17 Thread Martin Pieuchot
On 16/04/15(Thu) 16:06, attila wrote:
 [...] 
 After this was committed I received a critique of the driver from the
 person behind the Alea II (Andreas Gustafsson) who made a few pretty
 good points.  He felt trying to pull all the entropy off of the device
 that would theoretically be available every second was a losing
 strategy for several reasons.  Most importantly I ended up calling
 add_true_randomness() in bursts of 3200 calls every trip through
 ualea_task() whereas rnd_event_space[] in rnd.c only has 64 entries on
 a 32bit machine (42 on amd64); this almost surely means that the vast
 majority of my calls are no-ops... not so useful, it appears.
 
 The attached diff cranks the buffer size way down and now we call
 add_true_randomness() 32 times every 100ms.  When I crank ALEA_MSECS
 below 100ms I start to notice the load increases on the machine with
 an Alea II plugged in.  I guess this is because the stuff that happens
 in ualea_task() happens in the context of a process and that process
 always appears to be runnable when ALEA_MSECS is e.g. 10ms.  I crank
 the read timeout up to 5000ms because that's what he recommends in his
 sample code; under normal circumstances we never time out.
 
 The diff also explicitly looks for endpoint #1 because that's the
 endpoint that Andreas says to use, not necc. the first one that I
 find; as it turns out now the first one I find is the right one but
 just to be safe it's better to check explicitly.
 
 Maybe now this is closer to production-worthy.  All feedback and
 comments most welcome.

Applied, thanks.

Just one note, I had to apply your diff by hand because your MUA changes
the tab into space.  If you can change that for the next diff that would
be great!



Re: File protection, second attempt

2015-04-12 Thread Martin Pieuchot
Hello,

This is an interesting diff with a lot of stuff in it.  I'd suggest
you to give more details about what you're trying to achieve with it
and split it into small parts.

It's nice to see some work done in this area, but do not rush you'd be
surprise by the dragons in there.

On 12/04/15(Sun) 15:19, Kanonenvogel wrote:
 This is the second attempt of struct file protection.

Can you explain what need to be protected from what?

 1. Filelist, nfiles and operations with them are protected by rwlock.

Why not, could you describe why they need a lock?  A diff just to do
that would be much easier review.

 2. Garbage collector's flags FIF_MARK and FIF_DEFER moved from f_iflags to
 new field f_gc_flags (compatibility with pstat was kept).

Why did you decide to split f_iflags in two?  What's the problem this
move is solving?  Here too a diff just to do that would be much easier
to review.

 3. Operations over f_iflags are atomic. FIF_LARVAL set only once and
 FIF_HASLOCK isn't set too frequent.

Why are you using atomic operations for these flags?  Which scenario
that would help?   Did you consider any alternative?

 4. Set/unset operations over f_flag are atomic. They are not too frequent.

Does that mean that as soon as the atomic operation has completed all the
conditions that a flag represent are true/false?  Since the integrity of
the code you're changing is currently protected by the biglock the order
of the operations inside the functions does not really matter.   Setting
the flag atomically is in generally not enough and since you're not
giving more details it's hard to dig into a huge diff ;)

 5. Operations over f_count are atomic. Surely those are frequent though they
 can be non atomic on uniprocessor kernel.

Why do they need to be atomic and also why are you doing a dance with
the file_lock when you cannot increment the counter?

 6. f_offset field is not protectd now, it should be protected later.

From what should it be protected?

 7. Counters are not protected now, they should be protected later.

Same question :)

Since I've been lurking in that area too recently, you'll find 5
refactoring diffs attached to this email.  It's easier for me to
send you the work that I did rather than comment on some things.

One more question, did you consider the fact that the code you're
modifying might contain some bugs which are currently not exposed
because most of it is run under the KERNEL_LOCK?
From d7b381d438aa98a80e66c4aceeb905ef0a3ab9f3 Mon Sep 17 00:00:00 2001
From: Martin Pieuchot m...@openbsd.org
Date: Mon, 2 Mar 2015 16:33:04 +0100
Subject: [PATCH 1/5] Check for flags inside fd_getfile(), will help for
 upcoming locking.

---
 sys/arch/i386/i386/linux_machdep.c |  2 +-
 sys/compat/linux/linux_blkio.c |  2 +-
 sys/compat/linux/linux_cdrom.c |  7 +--
 sys/compat/linux/linux_fdio.c  |  2 +-
 sys/compat/linux/linux_file.c  |  4 ++--
 sys/compat/linux/linux_hdio.c  |  2 +-
 sys/compat/linux/linux_socket.c|  2 +-
 sys/compat/linux/linux_termios.c   |  7 +--
 sys/compat/ossaudio/ossaudio.c | 19 +++
 sys/dev/systrace.c |  2 +-
 sys/kern/kern_descrip.c| 23 +--
 sys/kern/kern_event.c  |  4 ++--
 sys/kern/kern_exec.c   |  2 +-
 sys/kern/sys_generic.c | 27 +++
 sys/kern/uipc_syscalls.c   |  2 +-
 sys/kern/uipc_usrreq.c |  2 +-
 sys/kern/vfs_lookup.c  |  2 +-
 sys/kern/vfs_syscalls.c| 22 +++---
 sys/miscfs/fuse/fuse_vfsops.c  |  2 +-
 sys/sys/filedesc.h |  2 +-
 sys/uvm/uvm_mmap.c |  2 +-
 21 files changed, 49 insertions(+), 90 deletions(-)

diff --git sys/arch/i386/i386/linux_machdep.c sys/arch/i386/i386/linux_machdep.c
index 3ada48c..e5ee44e 100644
--- sys/arch/i386/i386/linux_machdep.c
+++ sys/arch/i386/i386/linux_machdep.c
@@ -440,7 +440,7 @@ linux_machdepioctl(struct proc *p, void *v, register_t 
*retval)
com = SCARG(uap, com);
 
fdp = p-p_fd;
-   if ((fp = fd_getfile(fdp, fd)) == NULL)
+   if ((fp = fd_getfile(fdp, fd, 0)) == NULL)
return (EBADF);
 
switch (com) {
diff --git sys/compat/linux/linux_blkio.c sys/compat/linux/linux_blkio.c
index 287ae3c..860b1af 100644
--- sys/compat/linux/linux_blkio.c
+++ sys/compat/linux/linux_blkio.c
@@ -70,7 +70,7 @@ linux_ioctl_blkio(struct proc *p, struct linux_sys_ioctl_args 
*uap,
struct disklabel label;
 
 fdp = p-p_fd;
-   if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
+   if ((fp = fd_getfile(fdp, SCARG(uap, fd), 0)) == NULL)
return (EBADF);
FREF(fp);
error = 0;
diff --git sys/compat/linux/linux_cdrom.c sys/compat/linux/linux_cdrom.c
index b1ca8e1..b00fdfc 100644
--- sys/compat/linux/linux_cdrom.c
+++ sys/compat/linux/linux_cdrom.c
@@ -108,15 +108,10 @@ linux_ioctl_cdrom(p, v, retval

Re: [NEW] Driver for the Araneus Alea II USB TRNG

2015-04-15 Thread Martin Pieuchot
On 14/04/15(Tue) 15:22, attila wrote:
 Martin Pieuchot m...@openbsd.org writes:
  
  static const struct usb_devno ualea_devs[] = {
 { USB_VENDOR_ARANEUS,   USB_PRODUCT_ARANEUS_ALEA }
  };
 
  Is it possible to match your device based on the content of the device
  descriptor instead of whitelisting IDs?  Whitelisting means that if the
  company produce a compatible device with a new ID we'll need to modify
  the driver.
 
 
 Sadly, I don't think it is possible... you mean by looking at
 bDeviceClass/bDeviceSubClass/bDeviceProtocol?  He only gives me zeroes
 [...]
 Perhaps I am misunderstanding; is there something else in there I
 could/should match on?  I've changed the attach routine in the updated
 version to check vendor/product/iface, at least.

I was thinking at bInterfaceClass and bInterfaceProtocol but when they
are both to vendor defined (255), matching against the ID is the right
thing to do. 

 I looked and it appears that M_ZERO is used when allocating the memory
 for all of these structures, so I take it that explicit zeroing of
 things when tearing down is neither required nor desired?  I removed
 this kind of thing from ualea.c because it looked like it wasn't
 necessary.

That's right.

 I'm attaching the updated driver.
 
 Thank you for the critique.  I suppose I need to write a man page for
 this as well... working on it.

Perfect, I'll put it in tree together with your driver :)

 
 Pax, -A
 --
 att...@stalphonsos.com | http://trac.haqistan.net/~attila
 keyid E6CC1EDB | 4D91 1B98 A210 1D71 2A0E  AC29 9677 D0A6 E6CC 1EDB
 
 

 /*$OpenBSD$ */
 /*
  * Copyright (c) 2006 Alexander Yurchenko gra...@openbsd.org
  * Copyright (c) 2007 Marc Balmer mbal...@openbsd.org
  * Copyright (C) 2015 attila att...@stalphonsos.com
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
  * copyright notice and this permission notice appear in all copies.
  *
  * THE SOFTWARE IS PROVIDED AS IS AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 
 /*
  * Alea II TRNG.  Produces 100kbit/sec of entropy by black magic
  *
  * Product information in English can be found here:
  * http://www.araneus.fi/products/alea2/en/
  */
 
 #include sys/param.h
 #include sys/systm.h
 #include sys/device.h
 #include sys/kernel.h
 #include sys/timeout.h
 #include dev/usb/usb.h
 #include dev/usb/usbdevs.h
 #include dev/usb/usbdi.h
 #include dev/usb/usbdi_util.h
 #include dev/rndvar.h
 
 #define ALEA_IFACE0
 #define ALEA_MSECS100
 #define ALEA_READ_TIMEOUT 1000
 #define ALEA_BUFSIZ   ((1024/8)*100)  /* 100 kbits */
 
 #define DEVNAME(_sc) ((_sc)-sc_dev.dv_xname)
 
 struct ualea_softc {
   struct  device sc_dev;
   struct  usbd_device *sc_udev;
   struct  usbd_pipe *sc_pipe;
   struct  timeout sc_timeout;
   struct  usb_task sc_task;
   struct  usbd_xfer *sc_xfer;
   int *sc_buf;
 };
 
 int ualea_match(struct device *, void *, void *);
 void ualea_attach(struct device *, struct device *, void *);
 int ualea_detach(struct device *, int);
 void ualea_task(void *);
 void ualea_timeout(void *);
 
 struct cfdriver ualea_cd = {
   NULL, ualea, DV_DULL
 };
 
 const struct cfattach ualea_ca = {
   sizeof(struct ualea_softc),
   ualea_match,
   ualea_attach,
   ualea_detach
 };
 
 int
 ualea_match(struct device *parent, void *match, void *aux)
 {
   struct usb_attach_arg *uaa = aux;
 
   if (uaa-iface == NULL)
   return (UMATCH_NONE);
   if ((uaa-vendor == USB_VENDOR_ARANEUS) 
   (uaa-product == USB_PRODUCT_ARANEUS_ALEA) 
   (uaa-ifaceno == ALEA_IFACE))
   return (UMATCH_VENDOR_PRODUCT);
   return (UMATCH_NONE);
 }
 
 void
 ualea_attach(struct device *parent, struct device *self, void *aux)
 {
   struct ualea_softc *sc = (struct ualea_softc *)self;
   struct usb_attach_arg *uaa = aux;
   usb_interface_descriptor_t *id;
   usb_endpoint_descriptor_t *ed;
   int ep_ibulk = -1;
   usbd_status error;
   int i;
 
   sc-sc_udev = uaa-device;
   id = usbd_get_interface_descriptor(uaa-iface);
   for (i = 0; i  id-bNumEndpoints; i++) {
   ed = usbd_interface2endpoint_descriptor(uaa-iface, i);
   if (ed == NULL) {
   printf(%s: failed to get endpoint %d descriptor\n,
   DEVNAME(sc), i);
   return;
   }
   if (UE_GET_DIR(ed

bce(4), lge(4), nge(4), vge(4), wb(4), malo(4) and hme(4/sparc)

2015-04-07 Thread Martin Pieuchot
Here's a diff to convert these drivers to if_input().  They all make
use of m_devget(9) which takes an ifp as argument and I'd like to
change that.  But first I need to make sure all these drivers are
converted.

If you can try this diff on one or more chips, please let me know how
it goes.

Martin

Index: arch/sparc/dev/hme.c
===
RCS file: /cvs/src/sys/arch/sparc/dev/hme.c,v
retrieving revision 1.67
diff -u -p -r1.67 hme.c
--- arch/sparc/dev/hme.c22 Dec 2014 02:26:54 -  1.67
+++ arch/sparc/dev/hme.c7 Apr 2015 15:16:37 -
@@ -815,6 +815,7 @@ hme_read(sc, idx, len, flags)
u_int32_t flags;
 {
struct ifnet *ifp = sc-sc_arpcom.ac_if;
+   struct mbuf_list ml = MBUF_LIST_INITIALIZER();
struct mbuf *m;
 
if (len = sizeof(struct ether_header) ||
@@ -835,16 +836,8 @@ hme_read(sc, idx, len, flags)
 
ifp-if_ipackets++;
 
-#if NBPFILTER  0
-   /*
-* Check if there's a BPF listener on this interface.
-* If so, hand off the raw packet to BPF.
-*/
-   if (ifp-if_bpf)
-   bpf_mtap(ifp-if_bpf, m, BPF_DIRECTION_IN);
-#endif
-   /* Pass the packet up. */
-   ether_input_mbuf(ifp, m);
+   ml_enqueue(ml, m);
+   if_input(ifp, ml);
 }
 
 void
Index: dev/ic/mtd8xx.c
===
RCS file: /cvs/src/sys/dev/ic/mtd8xx.c,v
retrieving revision 1.24
diff -u -p -r1.24 mtd8xx.c
--- dev/ic/mtd8xx.c 22 Dec 2014 02:28:51 -  1.24
+++ dev/ic/mtd8xx.c 7 Apr 2015 15:24:43 -
@@ -874,6 +874,7 @@ mtd_intr(void *xsc)
 static void
 mtd_rxeof(struct mtd_softc *sc)
 {
+   struct mbuf_list ml = MBUF_LIST_INITIALIZER();
struct mbuf *m;
struct ifnet *ifp;
struct mtd_rx_desc *cur_rx;
@@ -934,12 +935,10 @@ mtd_rxeof(struct mtd_softc *sc)
 
ifp-if_ipackets++;
 
-#if NBPFILTER  0
-   if (ifp-if_bpf)
-   bpf_mtap(ifp-if_bpf, m, BPF_DIRECTION_IN);
-#endif
-   ether_input_mbuf(ifp, m);
+   ml_enqueue(ml, m);
}
+
+   if_input(ifp, ml);
 
sc-mtd_cdata.mtd_rx_prod = i;
 }
Index: dev/pci/if_bce.c
===
RCS file: /cvs/src/sys/dev/pci/if_bce.c,v
retrieving revision 1.43
diff -u -p -r1.43 if_bce.c
--- dev/pci/if_bce.c14 Mar 2015 03:38:48 -  1.43
+++ dev/pci/if_bce.c7 Apr 2015 15:25:44 -
@@ -694,6 +694,7 @@ void
 bce_rxintr(struct bce_softc *sc)
 {
struct ifnet *ifp = sc-bce_ac.ac_if;
+   struct mbuf_list ml = MBUF_LIST_INITIALIZER();
struct rx_pph *pph;
struct mbuf *m;
int curr;
@@ -741,17 +742,7 @@ bce_rxintr(struct bce_softc *sc)
BCE_PREPKT_HEADER_SIZE, len, ETHER_ALIGN, ifp);
ifp-if_ipackets++;
 
-#if NBPFILTER  0
-   /*
-* Pass this up to any BPF listeners, but only
-* pass it up the stack if it's for us.
-*/
-   if (ifp-if_bpf)
-   bpf_mtap(ifp-if_bpf, m, BPF_DIRECTION_IN);
-#endif
-
-   /* Pass it on. */
-   ether_input_mbuf(ifp, m);
+   ml_enqueue(ml, m);
 
/* re-check current in case it changed */
curr = (bus_space_read_4(sc-bce_btag, sc-bce_bhandle,
@@ -760,6 +751,9 @@ bce_rxintr(struct bce_softc *sc)
if (curr = BCE_NRXDESC)
curr = BCE_NRXDESC - 1;
}
+
+   if_input(ifp, ml);
+
sc-bce_rxin = curr;
 }
 
Index: dev/pci/if_lge.c
===
RCS file: /cvs/src/sys/dev/pci/if_lge.c,v
retrieving revision 1.64
diff -u -p -r1.64 if_lge.c
--- dev/pci/if_lge.c22 Dec 2014 02:28:52 -  1.64
+++ dev/pci/if_lge.c7 Apr 2015 15:27:14 -
@@ -680,6 +680,7 @@ lge_newbuf(struct lge_softc *sc, struct 
 void
 lge_rxeof(struct lge_softc *sc, int cnt)
 {
+   struct mbuf_listml = MBUF_LIST_INITIALIZER();
 struct mbuf*m;
 struct ifnet   *ifp;
struct lge_rx_desc  *cur_rx;
@@ -727,20 +728,11 @@ lge_rxeof(struct lge_softc *sc, int cnt)
}
m = m0;
} else {
-   m-m_pkthdr.rcvif = ifp;
m-m_pkthdr.len = m-m_len = total_len;
}
 
ifp-if_ipackets++;
 
-#if NBPFILTER  0
-   /*
-* Handle BPF listeners. Let the BPF user see the packet.
-*/
-   if (ifp-if_bpf)
-   bpf_mtap(ifp-if_bpf, m, BPF_DIRECTION_IN);
-#endif
-
/* Do IP checksum checking. */
if (rxsts  LGE_RXSTS_ISIP) {
if (!(rxsts  LGE_RXSTS_IPCSUMERR))
@@ -755,8 +747,10 @@ lge_rxeof(struct 

Re: bce(4), lge(4), nge(4), vge(4), wb(4), malo(4) and hme(4/sparc)

2015-04-08 Thread Martin Pieuchot
On 08/04/15(Wed) 10:03, David Gwynne wrote:
 
  On 8 Apr 2015, at 01:38, Martin Pieuchot m...@openbsd.org wrote:
  
  Here's a diff to convert these drivers to if_input().  They all make
  use of m_devget(9) which takes an ifp as argument and I'd like to
  change that.  But first I need to make sure all these drivers are
  converted.
  [...]
  Index: dev/ic/mtd8xx.c
  ===
  RCS file: /cvs/src/sys/dev/ic/mtd8xx.c,v
  retrieving revision 1.24
  diff -u -p -r1.24 mtd8xx.c
  --- dev/ic/mtd8xx.c 22 Dec 2014 02:28:51 -  1.24
  +++ dev/ic/mtd8xx.c 7 Apr 2015 15:24:43 -
  @@ -874,6 +874,7 @@ mtd_intr(void *xsc)
  static void
  mtd_rxeof(struct mtd_softc *sc)
  {
  +   struct mbuf_list ml = MBUF_LIST_INITIALIZER();
  struct mbuf *m;
  struct ifnet *ifp;
  struct mtd_rx_desc *cur_rx;
  @@ -934,12 +935,10 @@ mtd_rxeof(struct mtd_softc *sc)
  
  ifp-if_ipackets++;
  
  -#if NBPFILTER  0
  -   if (ifp-if_bpf)
  -   bpf_mtap(ifp-if_bpf, m, BPF_DIRECTION_IN);
  -#endif
  -   ether_input_mbuf(ifp, m);
  +   ml_enqueue(ml, m);
  }
  +
  +   if_input(ifp, ml);
  
  sc-mtd_cdata.mtd_rx_prod = i;
  }
 
 no.
 
 there's a return inside the loop in this driver, which means you'll leak the 
 mbufs on ml. if you use break instead of return it should be ok.

Like that?

Index: dev/ic/mtd8xx.c
===
RCS file: /cvs/src/sys/dev/ic/mtd8xx.c,v
retrieving revision 1.24
diff -u -p -r1.24 mtd8xx.c
--- dev/ic/mtd8xx.c 22 Dec 2014 02:28:51 -  1.24
+++ dev/ic/mtd8xx.c 8 Apr 2015 10:08:37 -
@@ -874,6 +874,7 @@ mtd_intr(void *xsc)
 static void
 mtd_rxeof(struct mtd_softc *sc)
 {
+   struct mbuf_list ml = MBUF_LIST_INITIALIZER();
struct mbuf *m;
struct ifnet *ifp;
struct mtd_rx_desc *cur_rx;
@@ -912,7 +913,7 @@ mtd_rxeof(struct mtd_softc *sc)
continue;
} else {
mtd_init(ifp);
-   return;
+   break;
}
}
 
@@ -934,12 +935,10 @@ mtd_rxeof(struct mtd_softc *sc)
 
ifp-if_ipackets++;
 
-#if NBPFILTER  0
-   if (ifp-if_bpf)
-   bpf_mtap(ifp-if_bpf, m, BPF_DIRECTION_IN);
-#endif
-   ether_input_mbuf(ifp, m);
+   ml_enqueue(ml, m);
}
+
+   if_input(ifp, ml);
 
sc-mtd_cdata.mtd_rx_prod = i;
 }



Re: rename gettdbbyaddr to gettdbbydst

2015-04-08 Thread Martin Pieuchot
On 08/04/15(Wed) 12:15, Mike Belopuhov wrote:
 OK to rename gettdbbyaddr to gettdbbydst since that's what it does
 and it aligns well with gettdbbysrc?

Makes sense to me.  This is also coherent to the comments in ip_ipsp.h.

One comment though, you're moving a splsoftnet() in tdb_delete() but it
seems that the hash function does not need this protection, or am I
misunderstanding something? 

 ---
  sys/netinet/ip_ipsp.c | 47 +--
  sys/netinet/ip_ipsp.h |  4 ++--
  sys/netinet/ip_spd.c  |  6 +++---
  3 files changed, 30 insertions(+), 27 deletions(-)
 
 diff --git sys/netinet/ip_ipsp.c sys/netinet/ip_ipsp.c
 index f6e598f..edeabc8 100644
 --- sys/netinet/ip_ipsp.c
 +++ sys/netinet/ip_ipsp.c
 @@ -118,21 +118,21 @@ struct xformsw xformsw[] = {
 tcp_signature_tdb_zeroize,tcp_signature_tdb_input,
 tcp_signature_tdb_output, }
  #endif /* TCP_SIGNATURE */
  };
  
  struct xformsw *xformswNXFORMSW = xformsw[nitems(xformsw)];
  
  #define  TDB_HASHSIZE_INIT   32
  
  static struct tdb **tdbh = NULL;
 -static struct tdb **tdbaddr = NULL;
 +static struct tdb **tdbdst = NULL;
  static struct tdb **tdbsrc = NULL;
  static u_int tdb_hashmask = TDB_HASHSIZE_INIT - 1;
  static int tdb_count;
  
  /*
   * Our hashing function needs to stir things with a non-zero random 
 multiplier
   * so we cannot be DoS-attacked via choosing of the data to hash.
   */
  int
  tdb_hash(u_int rdomain, u_int32_t spi, union sockaddr_union *dst,
 @@ -393,34 +393,34 @@ ipsp_aux_match(struct tdb *tdb,
   }
  
   return 1;
  }
  
  /*
   * Get an SA given the remote address, the security protocol type, and
   * the desired IDs.
   */
  struct tdb *
 -gettdbbyaddr(u_int rdomain, union sockaddr_union *dst, u_int8_t sproto,
 +gettdbbydst(u_int rdomain, union sockaddr_union *dst, u_int8_t sproto,
  struct ipsec_ref *srcid, struct ipsec_ref *dstid,
  struct ipsec_ref *local_cred, struct sockaddr_encap *filter,
  struct sockaddr_encap *filtermask)
  {
   u_int32_t hashval;
   struct tdb *tdbp;
  
 - if (tdbaddr == NULL)
 + if (tdbdst == NULL)
   return (struct tdb *) NULL;
  
   hashval = tdb_hash(rdomain, 0, dst, sproto);
  
 - for (tdbp = tdbaddr[hashval]; tdbp != NULL; tdbp = tdbp-tdb_anext)
 + for (tdbp = tdbdst[hashval]; tdbp != NULL; tdbp = tdbp-tdb_dnext)
   if ((tdbp-tdb_sproto == sproto) 
   (tdbp-tdb_rdomain == rdomain) 
   ((tdbp-tdb_flags  TDBF_INVALID) == 0) 
   (!memcmp(tdbp-tdb_dst, dst, SA_LEN(dst-sa {
   /* Do IDs and local credentials match ? */
   if (!ipsp_aux_match(tdbp, srcid, dstid,
   local_cred, NULL, filter, filtermask))
   continue;
   break;
   }
 @@ -576,85 +576,85 @@ tdb_soft_firstuse(void *v)
   pfkeyv2_expire(tdb, SADB_EXT_LIFETIME_SOFT);
   tdb-tdb_flags = ~TDBF_SOFT_FIRSTUSE;
  }
  
  /*
   * Caller is responsible for splsoftnet().
   */
  void
  tdb_rehash(void)
  {
 - struct tdb **new_tdbh, **new_tdbaddr, **new_srcaddr, *tdbp, *tdbnp;
 + struct tdb **new_tdbh, **new_tdbdst, **new_srcaddr, *tdbp, *tdbnp;
   u_int i, old_hashmask = tdb_hashmask;
   u_int32_t hashval;
  
   tdb_hashmask = (tdb_hashmask  1) | 1;
  
   new_tdbh = mallocarray(tdb_hashmask + 1, sizeof(struct tdb *), M_TDB,
   M_WAITOK | M_ZERO);
 - new_tdbaddr = mallocarray(tdb_hashmask + 1, sizeof(struct tdb *), M_TDB,
 + new_tdbdst = mallocarray(tdb_hashmask + 1, sizeof(struct tdb *), M_TDB,
   M_WAITOK | M_ZERO);
   new_srcaddr = mallocarray(tdb_hashmask + 1, sizeof(struct tdb *), M_TDB,
   M_WAITOK | M_ZERO);
  
   for (i = 0; i = old_hashmask; i++) {
   for (tdbp = tdbh[i]; tdbp != NULL; tdbp = tdbnp) {
   tdbnp = tdbp-tdb_hnext;
   hashval = tdb_hash(tdbp-tdb_rdomain,
   tdbp-tdb_spi, tdbp-tdb_dst,
   tdbp-tdb_sproto);
   tdbp-tdb_hnext = new_tdbh[hashval];
   new_tdbh[hashval] = tdbp;
   }
  
 - for (tdbp = tdbaddr[i]; tdbp != NULL; tdbp = tdbnp) {
 - tdbnp = tdbp-tdb_anext;
 + for (tdbp = tdbdst[i]; tdbp != NULL; tdbp = tdbnp) {
 + tdbnp = tdbp-tdb_dnext;
   hashval = tdb_hash(tdbp-tdb_rdomain,
   0, tdbp-tdb_dst,
   tdbp-tdb_sproto);
 - tdbp-tdb_anext = new_tdbaddr[hashval];
 - new_tdbaddr[hashval] = tdbp;
 + tdbp-tdb_dnext = new_tdbdst[hashval];
 + new_tdbdst[hashval] = tdbp;
   }
  
   for (tdbp = tdbsrc[i]; tdbp != NULL; tdbp = tdbnp) {
   tdbnp = 

m_devget(9), m_clget(9) and ifp

2015-04-08 Thread Martin Pieuchot
Now that all the drivers using m_devget(9) have been converted to
if_input() we no longer need to pass an interface pointer to set
`rcvif'.

This is a small step towards the removal of an interface pointer
in the mbuf header.

While here, m_clget(9) also no longer needs an ifp.

Ok?

Index: arch/sparc/dev/hme.c
===
RCS file: /cvs/src/sys/arch/sparc/dev/hme.c,v
retrieving revision 1.68
diff -u -p -r1.68 hme.c
--- arch/sparc/dev/hme.c8 Apr 2015 10:07:47 -   1.68
+++ arch/sparc/dev/hme.c8 Apr 2015 12:58:56 -
@@ -828,7 +828,7 @@ hme_read(sc, idx, len, flags)
 
/* Pull packet off interface. */
m = m_devget(sc-sc_bufs-rx_buf[idx] + HME_RX_OFFSET, len,
-   HME_RX_OFFSET, sc-sc_arpcom.ac_if);
+   HME_RX_OFFSET);
if (m == NULL) {
ifp-if_ierrors++;
return;
Index: arch/sparc64/dev/vnet.c
===
RCS file: /cvs/src/sys/arch/sparc64/dev/vnet.c,v
retrieving revision 1.43
diff -u -p -r1.43 vnet.c
--- arch/sparc64/dev/vnet.c 2 Apr 2015 09:46:48 -   1.43
+++ arch/sparc64/dev/vnet.c 8 Apr 2015 12:59:02 -
@@ -734,7 +734,7 @@ vnet_rx_vio_desc_data(struct vnet_softc 
}
 
/* Stupid OBP doesn't align properly. */
-m = m_devget(buf, dm-nbytes, ETHER_ALIGN, ifp);
+m = m_devget(buf, dm-nbytes, ETHER_ALIGN);
pool_put(sc-sc_pool, buf);
if (m == NULL) {
ifp-if_ierrors++;
Index: dev/ic/dc.c
===
RCS file: /cvs/src/sys/dev/ic/dc.c,v
retrieving revision 1.139
diff -u -p -r1.139 dc.c
--- dev/ic/dc.c 14 Mar 2015 03:38:47 -  1.139
+++ dev/ic/dc.c 8 Apr 2015 12:59:05 -
@@ -2143,7 +2143,7 @@ dc_rxeof(struct dc_softc *sc)
/* No errors; receive the packet. */
total_len -= ETHER_CRC_LEN;
 
-   m0 = m_devget(mtod(m, char *), total_len, ETHER_ALIGN, ifp);
+   m0 = m_devget(mtod(m, char *), total_len, ETHER_ALIGN);
dc_newbuf(sc, i, m);
DC_INC(i, DC_RX_LIST_CNT);
if (m0 == NULL) {
Index: dev/ic/mtd8xx.c
===
RCS file: /cvs/src/sys/dev/ic/mtd8xx.c,v
retrieving revision 1.25
diff -u -p -r1.25 mtd8xx.c
--- dev/ic/mtd8xx.c 8 Apr 2015 12:58:24 -   1.25
+++ dev/ic/mtd8xx.c 8 Apr 2015 12:59:08 -
@@ -924,7 +924,7 @@ mtd_rxeof(struct mtd_softc *sc)
0, sc-mtd_cdata.mtd_rx_chain[i].sd_map-dm_mapsize,
BUS_DMASYNC_POSTREAD);
 
-   m0 = m_devget(mtod(m, char *), total_len,  ETHER_ALIGN, ifp);
+   m0 = m_devget(mtod(m, char *), total_len,  ETHER_ALIGN);
mtd_newbuf(sc, i, m);
i = (i + 1) % MTD_RX_LIST_CNT;
if (m0 == NULL) {
Index: dev/ic/rtl81x9.c
===
RCS file: /cvs/src/sys/dev/ic/rtl81x9.c,v
retrieving revision 1.89
diff -u -p -r1.89 rtl81x9.c
--- dev/ic/rtl81x9.c30 Mar 2015 10:04:11 -  1.89
+++ dev/ic/rtl81x9.c8 Apr 2015 12:59:17 -
@@ -643,7 +643,7 @@ rl_rxeof(struct rl_softc *sc)
wrap = (sc-rl_cdata.rl_rx_buf + RL_RXBUFLEN) - rxbufpos;
 
if (total_len  wrap) {
-   m = m_devget(rxbufpos, wrap, ETHER_ALIGN, ifp);
+   m = m_devget(rxbufpos, wrap, ETHER_ALIGN);
if (m != NULL) {
m_copyback(m, wrap, total_len - wrap,
sc-rl_cdata.rl_rx_buf, M_NOWAIT);
@@ -654,7 +654,7 @@ rl_rxeof(struct rl_softc *sc)
}
cur_rx = (total_len - wrap + ETHER_CRC_LEN);
} else {
-   m = m_devget(rxbufpos, total_len, ETHER_ALIGN, ifp);
+   m = m_devget(rxbufpos, total_len, ETHER_ALIGN);
cur_rx += total_len + 4 + ETHER_CRC_LEN;
}
 
Index: dev/pci/if_ale.c
===
RCS file: /cvs/src/sys/dev/pci/if_ale.c,v
retrieving revision 1.37
diff -u -p -r1.37 if_ale.c
--- dev/pci/if_ale.c24 Mar 2015 10:09:06 -  1.37
+++ dev/pci/if_ale.c8 Apr 2015 12:59:25 -
@@ -1532,7 +1532,7 @@ ale_rxeof(struct ale_softc *sc)
 * on these low-end consumer ethernet controller.
 */
m = m_devget((char *)(rs + 1), length - ETHER_CRC_LEN,
-   ETHER_ALIGN, ifp);
+   ETHER_ALIGN);
if (m == NULL) {
ifp-if_iqdrops++;
ale_rx_update_page(sc, rx_page, length, prod);
Index: dev/pci/if_bce.c

if_input() design and next step

2015-04-01 Thread Martin Pieuchot
One of the interesting things we've done during s2k15 was to redesign
ether_input().  The name of this function is clearly misleading.  Back
in the old days [0] it was simply used to put a packet on a protocol
queue.  Today ether_input() still does that, but before it does a lot
of different hacks to determine if a packet should be feed to a pseudo-
interface like vlan(4), carp(4), brige(4), etc.

ether_input() is now *the* entry point of our network stack and that's
what we need to change.

The main issues that we analysed about the current plumbing are:

 1) None of the pseudo-driver is (almost) self-contained which increase
the risk of regressions due to code complexity.  See the recent vlan
example.

 2) They are all plugged differently and most of the time feeding packets
or copies of them recursively.

 3) Every packet has to go through all the pseudo-driver checks even if
you're not using any of them.

So we came up with an architecture that should allow us to solve these
issues while preserving all the existing features that we all enjoy.

In the end, having self-contained, transparent and non-recursive pseudo-
driver code path to feed packets to the network stack will allow us to
work on one driver at a time to make it MP-safe.  Right now it's just to
many spaghetti in the same plate.

So the idea of if_input() is that an interface (ifp) can have multiple
input packet handlers.  These handlers are all on a list and act as
filters.  You give a mbuf to the first one, if it does not return an
error, you give it to the second one, etc.

Now by default every Ethernet driver has a single handler: ether_input().

So what's next?  Well the diff below introduces a new filter for trunk.
Instead of calling ether_input() which will then call trunk_input() for
every interface part of a trunk(4), a new handler is added in the list.

Since this handler is now executed *before* ether_input() we do not call
m_adj() on the mbuf we're currently filtering.  That's a crucial point!

Apart from that, everything should be straightforward.  Please note that
the diff below depends on the recent if_input() and `rcvif` tweak I
just sent.

I'm happily running with it, please let me know how it goes on your
setup.

[0] https://github.com/jonathangray/csrg/blob/master/sys/net/if_ethersubr.c#L299

diff --git sys/net/if_ethersubr.c sys/net/if_ethersubr.c
index bbc9246..1d964dd 100644
--- sys/net/if_ethersubr.c
+++ sys/net/if_ethersubr.c
@@ -463,9 +463,6 @@ ether_input(struct mbuf *m, void *hdr)
int s, llcfound = 0;
struct llc *l;
struct arpcom *ac;
-#if NTRUNK  0
-   int i = 0;
-#endif
 #if NPPPOE  0
struct ether_header *eh_tmp;
 #endif
@@ -480,21 +477,6 @@ ether_input(struct mbuf *m, void *hdr)
m_adj(m, ETHER_HDR_LEN);
}
 
-#if NTRUNK  0
-   /* Handle input from a trunk port */
-   while (ifp-if_type == IFT_IEEE8023ADLAG) {
-   if (++i  TRUNK_MAX_STACKING) {
-   m_freem(m);
-   return (1);
-   }
-   if (trunk_input(ifp, eh, m) != 0)
-   return (1);
-
-   /* Has been set to the trunk interface */
-   ifp = m-m_pkthdr.rcvif;
-   }
-#endif
-
if ((ifp-if_flags  IFF_UP) == 0) {
m_freem(m);
return (1);
@@ -518,17 +500,9 @@ ether_input(struct mbuf *m, void *hdr)
else
m-m_flags |= M_MCAST;
ifp-if_imcasts++;
-#if NTRUNK  0
-   if (ifp != ifp0)
-   ifp0-if_imcasts++;
-#endif
}
 
ifp-if_ibytes += m-m_pkthdr.len + sizeof(*eh);
-#if NTRUNK  0
-   if (ifp != ifp0)
-   ifp0-if_ibytes += m-m_pkthdr.len + sizeof(*eh);
-#endif
 
etype = ntohs(eh-ether_type);
 
diff --git sys/net/if_trunk.c sys/net/if_trunk.c
index e364648..514f77d 100644
--- sys/net/if_trunk.c
+++ sys/net/if_trunk.c
@@ -94,14 +94,14 @@ int  trunk_rr_detach(struct trunk_softc *);
 voidtrunk_rr_port_destroy(struct trunk_port *);
 int trunk_rr_start(struct trunk_softc *, struct mbuf *);
 int trunk_rr_input(struct trunk_softc *, struct trunk_port *,
-   struct ether_header *, struct mbuf *);
+   struct mbuf *);
 
 /* Active failover */
 int trunk_fail_attach(struct trunk_softc *);
 int trunk_fail_detach(struct trunk_softc *);
 int trunk_fail_start(struct trunk_softc *, struct mbuf *);
 int trunk_fail_input(struct trunk_softc *, struct trunk_port *,
-   struct ether_header *, struct mbuf *);
+   struct mbuf *);
 
 /* Loadbalancing */
 int trunk_lb_attach(struct trunk_softc *);
@@ -110,7 +110,7 @@ int  trunk_lb_port_create(struct trunk_port *);
 voidtrunk_lb_port_destroy(struct trunk_port *);
 int trunk_lb_start(struct trunk_softc *, struct mbuf *);
 int trunk_lb_input(struct trunk_softc *, struct trunk_port *,
-   struct 

Re: add support for crc_enabled Elantech v3 touchpads

2015-04-02 Thread Martin Pieuchot
On 02/04/15(Thu) 18:43, Ulf Brosziewski wrote:
 On 04/02/2015 03:39 AM, Fasse wrote:
 On Wed, 01 Apr 2015 21:23:15 +0200
 Ulf Brosziewskiulf.brosziew...@t-online.de  wrote:
 Yes, without some refactoring there won't be an elegant way.
 pms_sync_elantech_v2 encodes some sync state in the 'flags' field
 (ELANTECH_F_2FINGER_PACKET), but doing the same in the v3/CRC case might
 be ugly.
 
 Admittedly I am biased because I don't want to refactor ~2400 LOC to get
 my touchpad working but I don't think that crc enabled v3 touchpads use
 the debounce packet. I just installed Ubuntu and compiled the 3.19.3
 linux kernel with added printk statements in the elantech_packet_check_v3
 function on my laptop. In the linux kernel documentation [0] for elantech
 touchpads it says about the debounce packet: Note on debounce: In case
 the box has unstable power supply or other electricity issues, or when
 number of finger changes, F/W would send debounce packet to inform
 driver that the hardware is in debounce status.
 I could not reproduce the unstable power supply but when switching the
 number of fingers on the touchpad no debounce packet is issued. Instead
 just the head and tail packets are registered and processed (unlike the
 OpenBSD driver which ignores the tail packet). This leads me to belief
 that v3/crc does not use debounce packets.
 Do you think this is possible/likely? (...)
 
 Why not? You seem to have shown that it is possible, at least for your
 hardware and multiple touches. It might well be that the author(s) of the
 Linux driver just wanted to be on the safe side.

Even if that's true, nothing prevent us to commit this diff first, as
long as it does not introduce regression and then work on the possible
refactoring needed to support debounce packets :)



carp(4) and Ethernet header

2015-04-09 Thread Martin Pieuchot
Same problem as with vlan(4).  carp(4) also need to stop calling
ether_input() directly and that implies having a mbuf with the
correct Ethernet header prepended.

Index: netinet/ip_carp.c
===
RCS file: /cvs/src/sys/netinet/ip_carp.c,v
retrieving revision 1.249
diff -u -p -r1.249 ip_carp.c
--- netinet/ip_carp.c   7 Apr 2015 10:46:20 -   1.249
+++ netinet/ip_carp.c   9 Apr 2015 12:08:35 -
@@ -1434,11 +1434,15 @@ carp_our_mcastaddr(struct ifnet *ifp, u_
 int
 carp_input(struct ifnet *ifp0, struct ether_header *eh0, struct mbuf *m)
 {
-   struct ether_header eh;
+   struct ether_header *eh;
struct carp_if *cif = (struct carp_if *)ifp0-if_carp;
struct ifnet *ifp;
 
-   memcpy(eh, eh0, sizeof(eh));
+   M_PREPEND(m, sizeof(*eh), M_DONTWAIT);
+   if (m == NULL)
+   return (-1);
+   eh = mtod(m, struct ether_header *);
+   memmove(eh, eh0, sizeof(*eh));
 
if ((ifp = carp_ourether(cif, eh0-ether_dhost)))
;
@@ -1459,11 +1463,11 @@ carp_input(struct ifnet *ifp0, struct et
m0-m_pkthdr.rcvif = vh-sc_if;
 #if NBPFILTER  0
if (vh-sc_if.if_bpf)
-   bpf_mtap_hdr(vh-sc_if.if_bpf, (char *)eh,
-   ETHER_HDR_LEN, m0, BPF_DIRECTION_IN, NULL);
+   bpf_mtap_ether(vh-sc_if.if_bpf, m,
+   BPF_DIRECTION_IN);
 #endif
vh-sc_if.if_ipackets++;
-   ether_input(m0, eh);
+   ether_input_mbuf(vh-sc_if, m0);
}
return (1);
}
@@ -1475,11 +1479,10 @@ carp_input(struct ifnet *ifp0, struct et
 
 #if NBPFILTER  0
if (ifp-if_bpf)
-   bpf_mtap_hdr(ifp-if_bpf, (char *)eh, ETHER_HDR_LEN, m,
-   BPF_DIRECTION_IN, NULL);
+   bpf_mtap_ether(ifp-if_bpf, m, BPF_DIRECTION_IN);
 #endif
ifp-if_ipackets++;
-   ether_input(m, eh);
+   ether_input_mbuf(ifp, m);
 
return (0);
 }



Run ifp detach hooks before ether_ifdetach()

2015-04-09 Thread Martin Pieuchot
Changes done by pseudo-driver *after* ether_ifattach() must be undone
*before* ether_ifdetach().  Otherwise it is impossible to ensure we're
leaving a stacked pseudo-interface in a correct state.

Since I don't want to modify every single driver calling ether_ifdetach()
I'm using a new function, if_deactivate() to undo all the pseudo-driver
stuff.
It is safe to call this function multiple times because after the first
time the parent interface won't have any pseudo-interface attached to it.

I'm leaving splnet() below, even if pseudo-interfaces that need it should
take care of raising it, to be on the safe side.  More cleanup might
happen later.

This is a trivial change needed for upcoming if_input() work.

Ok?

Index: net/if.c
===
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.327
diff -u -p -r1.327 if.c
--- net/if.c7 Apr 2015 10:46:20 -   1.327
+++ net/if.c9 Apr 2015 09:15:37 -
@@ -521,22 +521,12 @@ nettxintr(void)
splx(s);
 }
 
-/*
- * Detach an interface from everything in the kernel.  Also deallocate
- * private resources.
- */
 void
-if_detach(struct ifnet *ifp)
+if_deactivate(struct ifnet *ifp)
 {
-   struct ifaddr *ifa;
-   struct ifg_list *ifg;
-   int s = splnet();
-   struct domain *dp;
+   int s;
 
-   ifp-if_flags = ~IFF_OACTIVE;
-   ifp-if_start = if_detached_start;
-   ifp-if_ioctl = if_detached_ioctl;
-   ifp-if_watchdog = NULL;
+   s = splnet();
 
/*
 * Call detach hooks from head to tail.  To make sure detach
@@ -545,12 +535,6 @@ if_detach(struct ifnet *ifp)
 */
dohooks(ifp-if_detachhooks, HOOK_REMOVE | HOOK_FREE);
 
-   /* Remove the watchdog timeout */
-   timeout_del(ifp-if_slowtimo);
-
-   /* Remove the link state task */
-   task_del(systq, ifp-if_linkstatetask);
-
 #if NBRIDGE  0
/* Remove the interface from any bridge it is part of.  */
if (ifp-if_bridgeport)
@@ -562,6 +546,36 @@ if_detach(struct ifnet *ifp)
if (ifp-if_carp  ifp-if_type != IFT_CARP)
carp_ifdetach(ifp);
 #endif
+
+   splx(s);
+}
+
+/*
+ * Detach an interface from everything in the kernel.  Also deallocate
+ * private resources.
+ */
+void
+if_detach(struct ifnet *ifp)
+{
+   struct ifaddr *ifa;
+   struct ifg_list *ifg;
+   struct domain *dp;
+   int s;
+
+   /* Undo pseudo-driver changes. */
+   if_deactivate(ifp);
+
+   s = splnet();
+   ifp-if_flags = ~IFF_OACTIVE;
+   ifp-if_start = if_detached_start;
+   ifp-if_ioctl = if_detached_ioctl;
+   ifp-if_watchdog = NULL;
+
+   /* Remove the watchdog timeout */
+   timeout_del(ifp-if_slowtimo);
+
+   /* Remove the link state task */
+   task_del(systq, ifp-if_linkstatetask);
 
 #if NBPFILTER  0
bpfdetach(ifp);
Index: net/if.h
===
RCS file: /cvs/src/sys/net/if.h,v
retrieving revision 1.161
diff -u -p -r1.161 if.h
--- net/if.h18 Mar 2015 12:23:15 -  1.161
+++ net/if.h9 Apr 2015 09:15:37 -
@@ -446,6 +446,7 @@ voidif_attach(struct ifnet *);
 void   if_attachdomain(void);
 void   if_attachtail(struct ifnet *);
 void   if_attachhead(struct ifnet *);
+void   if_deactivate(struct ifnet *);
 void   if_detach(struct ifnet *);
 void   if_down(struct ifnet *);
 void   if_downall(void);
Index: net/if_ethersubr.c
===
RCS file: /cvs/src/sys/net/if_ethersubr.c,v
retrieving revision 1.191
diff -u -p -r1.191 if_ethersubr.c
--- net/if_ethersubr.c  7 Apr 2015 10:46:20 -   1.191
+++ net/if_ethersubr.c  9 Apr 2015 09:16:45 -
@@ -802,6 +802,9 @@ ether_ifdetach(struct ifnet *ifp)
struct ifih *ether_ifih;
struct ether_multi *enm;
 
+   /* Undo pseudo-driver changes. */
+   if_deactivate(ifp);
+
ether_ifih = SLIST_FIRST(ifp-if_inputs);
SLIST_REMOVE_HEAD(ifp-if_inputs, ifih_next);
 



bridge(4) and Ethernet header

2015-04-09 Thread Martin Pieuchot
Like vlan(4) and carp(4), bridge(4) also need to be tweaked to still
work during the if_input() transition.  Removing the ether_input()
calls in this driver was a bit tricky *because* bridge_input() is
called after ether_input().

See how 3 M_PREPEND() are converted to 1?  And soon none will be
required.

Index: net/if_bridge.c
===
RCS file: /cvs/src/sys/net/if_bridge.c,v
retrieving revision 1.233
diff -u -p -r1.233 if_bridge.c
--- net/if_bridge.c 7 Apr 2015 10:46:20 -   1.233
+++ net/if_bridge.c 9 Apr 2015 13:56:07 -
@@ -115,8 +115,7 @@ voidbridge_broadcast(struct bridge_soft
 struct ether_header *, struct mbuf *);
 void   bridge_localbroadcast(struct bridge_softc *, struct ifnet *,
 struct ether_header *, struct mbuf *);
-void   bridge_span(struct bridge_softc *, struct ether_header *,
-struct mbuf *);
+void   bridge_span(struct bridge_softc *, struct mbuf *);
 void   bridge_stop(struct bridge_softc *);
 void   bridge_init(struct bridge_softc *);
 intbridge_bifconf(struct bridge_softc *, struct ifbifconf *);
@@ -1004,7 +1003,7 @@ bridge_output(struct ifnet *ifp, struct 
return (0);
}
 #endif /* IPSEC */
-   bridge_span(sc, NULL, m);
+   bridge_span(sc, m);
 
TAILQ_FOREACH(p, sc-sc_iflist, next) {
dst_if = p-ifp;
@@ -1080,7 +1079,7 @@ sendunicast:
(sa = bridge_tunneltag(m, dst_p-brt_tunnel.sa.sa_family)) != NULL)
memcpy(sa, dst_p-brt_tunnel.sa, dst_p-brt_tunnel.sa.sa_len);
 
-   bridge_span(sc, NULL, m);
+   bridge_span(sc, m);
if ((dst_if-if_flags  IFF_RUNNING) == 0) {
m_freem(m);
return (ENETDOWN);
@@ -1296,9 +1295,10 @@ bridgeintr_frame(struct bridge_softc *sc
  * not for us, and schedule an interrupt.
  */
 struct mbuf *
-bridge_input(struct ifnet *ifp, struct ether_header *eh, struct mbuf *m)
+bridge_input(struct ifnet *ifp, struct ether_header *eh0, struct mbuf *m)
 {
struct bridge_softc *sc;
+   struct ether_header *eh;
int s;
struct bridge_iflist *ifl, *srcifl;
struct arpcom *ac;
@@ -1320,13 +1320,18 @@ bridge_input(struct ifnet *ifp, struct e
if ((sc-sc_if.if_flags  IFF_RUNNING) == 0)
return (m);
 
+   M_PREPEND(m, sizeof(*eh), M_DONTWAIT);
+   if (m == NULL)
+   return (NULL);
+   eh = mtod(m, struct ether_header *);
+   memmove(eh, eh0, sizeof(*eh));
+
 #if NBPFILTER  0
if (sc-sc_if.if_bpf)
-   bpf_mtap_hdr(sc-sc_if.if_bpf, (caddr_t)eh,
-   ETHER_HDR_LEN, m, BPF_DIRECTION_IN, NULL);
+   bpf_mtap_ether(sc-sc_if.if_bpf, m, BPF_DIRECTION_IN);
 #endif
 
-   bridge_span(sc, eh, m);
+   bridge_span(sc, m);
 
if (m-m_flags  (M_BCAST | M_MCAST)) {
/*
@@ -1356,18 +1361,9 @@ bridge_input(struct ifnet *ifp, struct e
(ifl-bif_state == BSTP_IFSTATE_DISCARDING))
return (m);
 
-   /*
-* make a copy of 'm' with 'eh' tacked on to the
-* beginning.  Return 'm' for local processing
-* and enqueue the copy.  Schedule netisr.
-*/
mc = m_copym2(m, 0, M_COPYALL, M_NOWAIT);
if (mc == NULL)
return (m);
-   M_PREPEND(mc, ETHER_HDR_LEN, M_DONTWAIT);
-   if (mc == NULL)
-   return (m);
-   bcopy(eh, mtod(mc, caddr_t), ETHER_HDR_LEN);
s = splnet();
if (IF_QFULL(sc-sc_if.if_snd)) {
m_freem(mc);
@@ -1387,11 +1383,11 @@ bridge_input(struct ifnet *ifp, struct e
m-m_pkthdr.ph_rtableid = ifl-ifp-if_rdomain;
 #if NBPFILTER  0
if (ifl-ifp-if_bpf)
-   bpf_mtap(ifl-ifp-if_bpf, m,
+   bpf_mtap_ether(ifl-ifp-if_bpf, m,
BPF_DIRECTION_IN);
 #endif
m-m_flags |= M_PROTO1;
-   ether_input(m, eh);
+   ether_input_mbuf(ifl-ifp, m);
ifl-ifp-if_ipackets++;
m = NULL;
}
@@ -1434,21 +1430,21 @@ bridge_input(struct ifnet *ifp, struct e
 * is aware */
 #if NBPFILTER  0
if (ifl-ifp-if_bpf)
-   bpf_mtap_hdr(ifl-ifp-if_bpf, (caddr_t)eh,
-   ETHER_HDR_LEN, m, BPF_DIRECTION_IN, NULL);
+   bpf_mtap_ether(ifl-ifp-if_bpf, m,
+   BPF_DIRECTION_IN);
 #endif
/* Count for the interface we are 

vlan(4) and Ethernet header

2015-04-09 Thread Martin Pieuchot
Right now vlan_input() is called *after* ether_input().  More precisely
it is called after the mbuf has been m_adj(9)'usted to skip the Ethernet
header.  This is not a problem in se but...

To make sure vlan(4) keeps working during the if_input() transition, it
has to stop calling ether_input() and use if_input() instead.  Since
this new API will run input handler *before* ether_input(), the packet
*must* contain the original Ethernet header.  Hence the small hack
below.  This is just temporary this code will be deleted as soon as 
vlan(4) is converted to if_input().

Index: net/if_vlan.c
===
RCS file: /cvs/src/sys/net/if_vlan.c,v
retrieving revision 1.114
diff -u -p -r1.114 if_vlan.c
--- net/if_vlan.c   7 Apr 2015 10:46:20 -   1.114
+++ net/if_vlan.c   9 Apr 2015 12:12:35 -
@@ -277,6 +277,7 @@ vlan_input(struct ether_header *eh, stru
struct vlan_taghash *tagh;
u_inttag;
u_int16_tetype;
+   struct ether_header *eh1;
 
if (m-m_flags  M_VLANTAG) {
etype = ETHERTYPE_VLAN;
@@ -351,8 +352,14 @@ vlan_input(struct ether_header *eh, stru
}
}
 
+   M_PREPEND(m, sizeof(*eh1), M_DONTWAIT);
+   if (m == NULL)
+   return (-1);
+   eh1 = mtod(m, struct ether_header *);
+   memmove(eh1, eh, sizeof(*eh1));
+
ifv-ifv_if.if_ipackets++;
-   ether_input(m, eh);
+   ether_input_mbuf(ifv-ifv_if, m);
 
return (0);
 }



Re: Brainy: Memory Leak in ICMP

2015-05-19 Thread Martin Pieuchot
On 19/05/15(Tue) 15:28, Maxime Villard wrote:
 -- netinet/ip_icmp.c --
 
 925   rt = rtalloc(sintosa(sin), RT_REPORT|RT_RESOLVE, rtableid);
   if (rt == NULL)
   return (NULL);
 
   /* Check if the route is actually usable */
   if (rt-rt_flags  (RTF_REJECT | RTF_BLACKHOLE) ||
   (rt-rt_flags  RTF_UP) == 0)
   return (NULL);
 
 ---
 
 'rt' is not released.
 
 Found by The Brainy Code Scanner.

Indeed!  Thanks for the report, I just committed a fix.

Martin



Re: ospfd announces carp interface with physical link down

2015-05-20 Thread Martin Pieuchot
On 20/05/15(Wed) 07:40, Henning Brauer wrote:
 * Johan Ymerson johan.ymer...@transmode.com [2015-05-19 19:25]:
  On Tue, 2015-05-19 at 11:16 +, Johan Ymerson wrote:
   On Tue, 2015-05-19 at 11:24 +0100, Stuart Henderson wrote:
On 2015/05/19 10:10, Johan Ymerson wrote:
Yes I understand that, but if carp init was counted in LINK_STATE_DOWN
then the metric would be 65535 which I think would still avoid the
problem you're seeing, and would involve less special-casing in ospfd.
   Yes, that would also resolve the issue, but it is a bit illogical to
   announce a network we cannot possibly route traffic to (due to hardware
   problems).
  After some more testing I think we can conclude that this is most
  definitely a kernel issue.
 
 hmm. there's definately more to it.

Indeed
 
 just for completeness: LINK_STATE_INVALID is 1, and that's what
 carp_set_state uses for everything but master and backup. so far so
 good.
 
 ifp is part of the sc which in turn is malloc'd with M_ZERO in
 carp_clone_create, so link state will be 0 aka LINK_STATE_UNKNOWN.
 
 however, at the end of carp_clone_create, we call
 carp_set_state_all(sc, INIT) which should take care of that.

Sadly it does not, because of:

2305:   if (vhe-state == state)
2306:   return;

I'd extend your diff a little bit to make this vhe-state transition
less confusing, see below.  Do you confirm this also fixes your issue?

Index: netinet/ip_carp.c
===
RCS file: /cvs/src/sys/netinet/ip_carp.c,v
retrieving revision 1.256
diff -u -p -r1.256 ip_carp.c
--- netinet/ip_carp.c   15 May 2015 11:53:06 -  1.256
+++ netinet/ip_carp.c   20 May 2015 10:48:17 -
@@ -708,9 +708,7 @@ carpattach(int n)
 }
 
 int
-carp_clone_create(ifc, unit)
-   struct if_clone *ifc;
-   int unit;
+carp_clone_create(struct if_clone *ifc, int unit)
 {
struct carp_softc *sc;
struct ifnet *ifp;
@@ -753,11 +751,11 @@ carp_clone_create(ifc, unit)
ifp-if_sadl-sdl_type = IFT_CARP;
ifp-if_output = carp_output;
ifp-if_priority = IF_CARP_DEFAULT_PRIORITY;
+   ifp-if_link_state = LINK_STATE_INVALID;
 
/* Hook carp_addr_updated to cope with address and route changes. */
sc-ah_cookie = hook_establish(sc-sc_if.if_addrhooks, 0,
carp_addr_updated, sc);
-   carp_set_state_all(sc, INIT);
 
return (0);
 }
@@ -774,6 +772,7 @@ carp_new_vhost(struct carp_softc *sc, in
vhe-parent_sc = sc;
vhe-vhid = vhid;
vhe-advskew = advskew;
+   vhe-state = INIT;
timeout_set(vhe-ad_tmo, carp_send_ad, vhe);
timeout_set(vhe-md_tmo, carp_master_down, vhe);
timeout_set(vhe-md6_tmo, carp_master_down, vhe);
@@ -2276,8 +2275,12 @@ carp_set_state_all(struct carp_softc *sc
 {
struct carp_vhost_entry *vhe;
 
-   LIST_FOREACH(vhe, sc-carp_vhosts, vhost_entries)
+   LIST_FOREACH(vhe, sc-carp_vhosts, vhost_entries) {
+   if (vhe-state == state)
+   continue;
+
carp_set_state(vhe, state);
+   }
 }
 
 void
@@ -2287,8 +2290,8 @@ carp_set_state(struct carp_vhost_entry *
static const char *carp_states[] = { CARP_STATES };
int loglevel;
 
-   if (vhe-state == state)
-   return;
+   KASSERT(vhe-state != state);
+
if (vhe-state == INIT || state == INIT)
loglevel = LOG_WARNING;
else



Re: ospfd announces carp interface with physical link down

2015-05-21 Thread Martin Pieuchot
On 20/05/15(Wed) 14:14, Johan Ymerson wrote:
 [...]
 The patch did not apply cleanly to OPENBSD_5_7, I rewrote the patch a
 bit:

Thanks, I committed my diff to -current.

 With this patch everything (almost) work. At least as good as my patch
 did. OSPFd still does something wrong with the link state of carp
 interfaces when starting. Have a look at this:
 
 fw2:/usr/src/sys # ospfctl show int
 Interface   AddressState  HelloTimer Linkstate  Uptimenc  ac
 carp7   195.58.98.145/28   DOWN   -  backup 00:00:00   0   0
 carp5   192.168.253.1/24   DOWN   -  backup 00:00:00   0   0
 carp3   192.168.202.1/24   DOWN   -  backup 00:00:00   0   0
 carp2   192.168.254.1/23   DOWN   -  invalid00:00:00   0   0
 carp1   31.15.61.129/26DOWN   -  invalid00:00:00   0   0
 carp0   92.33.0.202/30 DOWN   -  backup 00:00:00   0   0
 bnx0192.168.200.5/24   OTHER  00:00:00   active 00:13:13   4   2
 
 carp2 is (correctly) invalid, because the cable is plugged.
 carp1 is _not_ invalid.  If I restart ospfd after the system has come up it
 looks better:
 carp1   31.15.61.129/26DOWN   -  backup 00:00:00   0   0
 
 This happens with random interfaces at start-up.
 I believe this may be the cause:
 in usr.sbin/ospfd/interface.c, if_act_start():
 
 if (!((iface-flags  IFF_UP) 
 LINK_STATE_IS_UP(iface-linkstate)))
 return (0);
 
 This check lack the exception for carp interfaces found in ospfe.c. If
 the interface already has been initialized when ospfd starts, it will
 not pick that interface up as a carp interface.

I don't know much about ospfd but if changing this check solves your
issues, feel free to send a diff.  I'd suggest creating a new thread
with an obvious name :)



Re: RTF_LOCAL and permanent ARP

2015-06-06 Thread Martin Pieuchot
On 05/06/15(Fri) 15:04, Claudio Jeker wrote:
 On Thu, Jun 04, 2015 at 12:19:10PM +0200, Martin Pieuchot wrote:
  I'd like to put the link-layer address back into the gateway field of
  RTF_LOCAL addresses.  The problem is that RTF_LOCAL routes are also
  marked as RTF_LLINFO and a lot of code assume (correctly) that such
  routes contain valid ARP or ND information.
  
  I believe we decided to use an ``empty'' lladdr because previously all
  the routes created via rt_ifa_add(9) were using the same code and we
  needed the exact same gateway to remove MPATH routes.  But now that
  only RTF_LOCAL routes use this code and taking into consideration that
  such route *cannot* be MPATH, we can simply use ifp-if_sadl instead
  of a blank sockaddr_dl.
  
  This should also fix the (imcomplete) output in arp(8) and ndp(8).
  
  Ok?
 
 If I see this correctly rt_ifa_del is now doing deletes without a gateway
 specified for the RTF_LLINFO flag is set. Are you sure that there will be
 never multipath routes in that case?

Yes I'm sure because in this case RTF_LLINFO implies RTF_LOCAL.  And we
only create one RTF_LOCAL route per IP address.

I should probably add some KASSERT()s. to auto-document that.

 Also do we need to set RTAX_LABEL on remove? I think that is not needed.
 This is unrelated and should be handled independently.

I don't think so.

 Apart from that OK claudio@
 
  
  Index: net/route.c
  ===
  RCS file: /cvs/src/sys/net/route.c,v
  retrieving revision 1.212
  diff -u -p -r1.212 route.c
  --- net/route.c 26 May 2015 12:19:51 -  1.212
  +++ net/route.c 4 Jun 2015 10:03:51 -
  @@ -1121,27 +1121,23 @@ rt_maskedcopy(struct sockaddr *src, stru
   int
   rt_ifa_add(struct ifaddr *ifa, int flags, struct sockaddr *dst)
   {
  +   struct ifnet*ifp = ifa-ifa_ifp;
  struct rtentry  *rt, *nrt = NULL;
  struct sockaddr_rtlabel  sa_rl;
  -   struct sockaddr_dl   sa_dl = { sizeof(sa_dl), AF_LINK };
  struct rt_addrinfo   info;
  -   u_short  rtableid = ifa-ifa_ifp-if_rdomain;
  -   u_int8_t prio = ifa-ifa_ifp-if_priority + RTP_STATIC;
  +   u_short  rtableid = ifp-if_rdomain;
  +   u_int8_t prio = ifp-if_priority + RTP_STATIC;
  int  error;
   
  -   sa_dl.sdl_type = ifa-ifa_ifp-if_type;
  -   sa_dl.sdl_index = ifa-ifa_ifp-if_index;
  -
  memset(info, 0, sizeof(info));
  info.rti_ifa = ifa;
  info.rti_flags = flags | RTF_MPATH;
  info.rti_info[RTAX_DST] = dst;
  if (flags  RTF_LLINFO)
  -   info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)sa_dl;
  +   info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)ifp-if_sadl;
  else
  info.rti_info[RTAX_GATEWAY] = ifa-ifa_addr;
  -   info.rti_info[RTAX_LABEL] =
  -   rtlabel_id2sa(ifa-ifa_ifp-if_rtlabelid, sa_rl);
  +   info.rti_info[RTAX_LABEL] = rtlabel_id2sa(ifp-if_rtlabelid, sa_rl);
   
   #ifdef MPLS
  if ((flags  RTF_MPLS) == RTF_MPLS) {
  @@ -1189,14 +1185,14 @@ rt_ifa_add(struct ifaddr *ifa, int flags
   int
   rt_ifa_del(struct ifaddr *ifa, int flags, struct sockaddr *dst)
   {
  +   struct ifnet*ifp = ifa-ifa_ifp;
  struct rtentry  *rt, *nrt = NULL;
  struct mbuf *m = NULL;
  struct sockaddr *deldst;
  struct rt_addrinfo   info;
  struct sockaddr_rtlabel  sa_rl;
  -   struct sockaddr_dl   sa_dl = { sizeof(sa_dl), AF_LINK };
  -   u_short  rtableid = ifa-ifa_ifp-if_rdomain;
  -   u_int8_t prio = ifa-ifa_ifp-if_priority + RTP_STATIC;
  +   u_short  rtableid = ifp-if_rdomain;
  +   u_int8_t prio = ifp-if_priority + RTP_STATIC;
  int  error;
   
   #ifdef MPLS
  @@ -1227,19 +1223,13 @@ rt_ifa_del(struct ifaddr *ifa, int flags
  }
  }
   
  -   sa_dl.sdl_type = ifa-ifa_ifp-if_type;
  -   sa_dl.sdl_index = ifa-ifa_ifp-if_index;
  -
  memset(info, 0, sizeof(info));
  info.rti_ifa = ifa;
  info.rti_flags = flags;
  info.rti_info[RTAX_DST] = dst;
  -   if (flags  RTF_LLINFO)
  -   info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)sa_dl;
  -   else
  +   if ((flags  RTF_LLINFO) == 0)
  info.rti_info[RTAX_GATEWAY] = ifa-ifa_addr;
  -   info.rti_info[RTAX_LABEL] =
  -   rtlabel_id2sa(ifa-ifa_ifp-if_rtlabelid, sa_rl);
  +   info.rti_info[RTAX_LABEL] = rtlabel_id2sa(ifp-if_rtlabelid, sa_rl);
   
  if ((flags  RTF_HOST) == 0)
  info.rti_info[RTAX_NETMASK] = ifa-ifa_netmask;
  Index: netinet6/nd6.c
  ===
  RCS file: /cvs/src/sys/netinet6/nd6.c,v
  retrieving revision 1.136
  diff -u -p -r1.136 nd6.c
  --- netinet6/nd6.c  15 May 2015 12:00:57 -  1.136
  +++ netinet6/nd6.c  4 Jun 2015 09:51:54 -
  @@ -651,7 +651,6

Unneeded splnet()

2015-06-08 Thread Martin Pieuchot
bridge_ifenqueue() does not need any spl protection, if_output()
already raises it.

ok?

Index: net/if_bridge.c
===
RCS file: /cvs/src/sys/net/if_bridge.c,v
retrieving revision 1.241
diff -u -p -r1.241 if_bridge.c
--- net/if_bridge.c 8 Jun 2015 13:44:08 -   1.241
+++ net/if_bridge.c 8 Jun 2015 13:48:29 -
@@ -967,7 +967,7 @@ bridge_output(struct ifnet *ifp, struct 
struct bridge_rtnode *dst_p = NULL;
struct ether_addr *dst;
struct bridge_softc *sc;
-   int s, error, len;
+   int error, len;
 
/* ifp must be a member interface of the bridge. */ 
if (ifp-if_bridgeport == NULL) {
@@ -1072,9 +1072,7 @@ bridge_output(struct ifnet *ifp, struct 
mc = m1;
}
 
-   s = splnet();
error = bridge_ifenqueue(sc, dst_if, mc);
-   splx(s);
if (error)
continue;
}
@@ -1093,9 +1091,7 @@ sendunicast:
m_freem(m);
return (ENETDOWN);
}
-   s = splnet();
bridge_ifenqueue(sc, dst_if, m);
-   splx(s);
return (0);
 }
 
@@ -1135,12 +1131,12 @@ bridgeintr(void)
 void
 bridgeintr_frame(struct bridge_softc *sc, struct mbuf *m)
 {
-   int s, len;
struct ifnet *src_if, *dst_if;
struct bridge_iflist *ifl;
struct bridge_rtnode *dst_p;
struct ether_addr *dst, *src;
struct ether_header eh;
+   int len;
 
if ((sc-sc_if.if_flags  IFF_RUNNING) == 0) {
m_freem(m);
@@ -1293,9 +1289,7 @@ bridgeintr_frame(struct bridge_softc *sc
if ((len - ETHER_HDR_LEN)  dst_if-if_mtu)
bridge_fragment(sc, dst_if, eh, m);
else {
-   s = splnet();
bridge_ifenqueue(sc, dst_if, m);
-   splx(s);
}
 }
 
@@ -1499,7 +1493,7 @@ bridge_broadcast(struct bridge_softc *sc
struct bridge_iflist *p;
struct mbuf *mc;
struct ifnet *dst_if;
-   int len, s, used = 0;
+   int len, used = 0;
 
TAILQ_FOREACH(p, sc-sc_iflist, next) {
/*
@@ -1585,9 +1579,7 @@ bridge_broadcast(struct bridge_softc *sc
if ((len - ETHER_HDR_LEN)  dst_if-if_mtu)
bridge_fragment(sc, dst_if, eh, mc);
else {
-   s = splnet();
bridge_ifenqueue(sc, dst_if, mc);
-   splx(s);
}
}
 
@@ -1638,7 +1630,7 @@ bridge_span(struct bridge_softc *sc, str
struct bridge_iflist *p;
struct ifnet *ifp;
struct mbuf *mc, *m;
-   int s, error;
+   int error;
 
if (TAILQ_EMPTY(sc-sc_spanlist))
return;
@@ -1665,9 +1657,7 @@ bridge_span(struct bridge_softc *sc, str
continue;
}
 
-   s = splnet();
error = bridge_ifenqueue(sc, ifp, mc);
-   splx(s);
if (error)
continue;
}
@@ -2555,7 +2545,7 @@ bridge_fragment(struct bridge_softc *sc,
 {
struct llc llc;
struct mbuf *m0;
-   int s, error = 0;
+   int error = 0;
int hassnap = 0;
u_int16_t etype;
struct ip *ip;
@@ -2570,9 +2560,7 @@ bridge_fragment(struct bridge_softc *sc,
len += ETHER_VLAN_ENCAP_LEN;
if ((ifp-if_capabilities  IFCAP_VLAN_MTU) 
(len - sizeof(struct ether_vlan_header) = ifp-if_mtu)) {
-   s = splnet();
bridge_ifenqueue(sc, ifp, m);
-   splx(s);
return;
}
goto dropit;
@@ -2640,13 +2628,10 @@ bridge_fragment(struct bridge_softc *sc,
continue;
}
bcopy(eh, mtod(m, caddr_t), sizeof(*eh));
-   s = splnet();
error = bridge_ifenqueue(sc, ifp, m);
if (error) {
-   splx(s);
continue;
}
-   splx(s);
} else
m_freem(m);
}



bridge_output() without m_buf_tag

2015-06-08 Thread Martin Pieuchot
Diff below moves bridge_output() to if_output().  It fixes the case I
already described some weeks ago where you have a physical interface
in a bridge and a vlan on top of it which is not in the bridge.

It also change the loop prevention code to use M_PROTO1 like in the
input path.

Tests, comments and oks welcome.

Index: net/if.c
===
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.338
diff -u -p -r1.338 if.c
--- net/if.c7 Jun 2015 12:02:28 -   1.338
+++ net/if.c8 Jun 2015 13:46:19 -
@@ -449,6 +449,19 @@ if_output(struct ifnet *ifp, struct mbuf
int s, length, error = 0;
unsigned short mflags;
 
+#ifdef DIAGNOSTIC
+   if (ifp-if_rdomain != rtable_l2(m-m_pkthdr.ph_rtableid)) {
+   printf(%s: trying to send packet on wrong domain. 
+   if %d vs. mbuf %d\n, ifp-if_xname, ifp-if_rdomain,
+   rtable_l2(m-m_pkthdr.ph_rtableid));
+   }
+#endif
+
+#if NBRIDGE  0
+   if (ifp-if_bridgeport  (m-m_flags  M_PROTO1) == 0)
+   return (bridge_output(ifp, m, NULL, NULL));
+#endif
+
length = m-m_pkthdr.len;
mflags = m-m_flags;
 
Index: net/if_bridge.c
===
RCS file: /cvs/src/sys/net/if_bridge.c,v
retrieving revision 1.241
diff -u -p -r1.241 if_bridge.c
--- net/if_bridge.c 8 Jun 2015 13:44:08 -   1.241
+++ net/if_bridge.c 8 Jun 2015 13:46:19 -
@@ -2665,10 +2665,12 @@ bridge_ifenqueue(struct bridge_softc *sc
 {
int error, len;
 
+   /* Loop prevention. */
+   m-m_flags |= M_PROTO1;
+
 #if NGIF  0
/* Packet needs etherip encapsulation. */
if (ifp-if_type == IFT_GIF) {
-   m-m_flags |= M_PROTO1;
 
/* Count packets input into the gif from outside */
ifp-if_ipackets++;
Index: net/if_ethersubr.c
===
RCS file: /cvs/src/sys/net/if_ethersubr.c,v
retrieving revision 1.204
diff -u -p -r1.204 if_ethersubr.c
--- net/if_ethersubr.c  8 Jun 2015 13:44:08 -   1.204
+++ net/if_ethersubr.c  8 Jun 2015 13:46:20 -
@@ -181,15 +181,6 @@ ether_output(struct ifnet *ifp, struct m
struct arpcom *ac = (struct arpcom *)ifp;
int error = 0;
 
-#ifdef DIAGNOSTIC
-   if (ifp-if_rdomain != rtable_l2(m-m_pkthdr.ph_rtableid)) {
-   printf(%s: trying to send packet on wrong domain. 
-   if %d vs. mbuf %d, AF %d\n, ifp-if_xname,
-   ifp-if_rdomain, rtable_l2(m-m_pkthdr.ph_rtableid),
-   dst-sa_family);
-   }
-#endif
-
esrc = ac-ac_enaddr;
 
if ((ifp-if_flags  (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING))
@@ -276,47 +267,6 @@ ether_output(struct ifnet *ifp, struct m
eh-ether_type = etype;
memcpy(eh-ether_dhost, edst, sizeof(eh-ether_dhost));
memcpy(eh-ether_shost, esrc, sizeof(eh-ether_shost));
-
-#if NBRIDGE  0
-   /*
-* Interfaces that are bridgeports need special handling for output.
-*/
-   if (ifp-if_bridgeport) {
-   struct m_tag *mtag;
-
-   /*
-* Check if this packet has already been sent out through
-* this bridgeport, in which case we simply send it out
-* without further bridge processing.
-*/
-   for (mtag = m_tag_find(m, PACKET_TAG_BRIDGE, NULL); mtag;
-   mtag = m_tag_find(m, PACKET_TAG_BRIDGE, mtag)) {
-#ifdef DEBUG
-   /* Check that the information is there */
-   if (mtag-m_tag_len != sizeof(caddr_t)) {
-   error = EINVAL;
-   goto bad;
-   }
-#endif
-   if (!memcmp(ifp-if_bridgeport, mtag + 1,
-   sizeof(caddr_t)))
-   break;
-   }
-   if (mtag == NULL) {
-   /* Attach a tag so we can detect loops */
-   mtag = m_tag_get(PACKET_TAG_BRIDGE, sizeof(caddr_t),
-   M_NOWAIT);
-   if (mtag == NULL) {
-   error = ENOBUFS;
-   goto bad;
-   }
-   memcpy(mtag + 1, ifp-if_bridgeport, sizeof(caddr_t));
-   m_tag_prepend(m, mtag);
-   error = bridge_output(ifp, m, NULL, NULL);
-   return (error);
-   }
-   }
-#endif
 
return (if_output(ifp, m));
 bad:
Index: sys/mbuf.h
===
RCS file: /cvs/src/sys/sys/mbuf.h,v
retrieving revision 1.191
diff -u -p -r1.191 mbuf.h
--- sys/mbuf.h  23 May 2015 12:52:59 -  1.191
+++ sys/mbuf.h  8 Jun 2015 13:46:20 

Conver bridge(4) to if_input()

2015-06-08 Thread Martin Pieuchot
This is the last pseudo-driver conversion.

The idea is to run bridge_input() *before* any ifih on an interface.
Doing so allow us to remove the hack between vlan(4) and bridge(4)
and simplify the logic for stacked ifih.

With that net/if_ethersubr.c is now free from #ifdef NPSEUDODRIVER.

As usual, tests comments and oks welcome.

Index: net/if.c
===
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.338
diff -u -p -r1.338 if.c
--- net/if.c7 Jun 2015 12:02:28 -   1.338
+++ net/if.c8 Jun 2015 14:07:56 -
@@ -529,6 +529,16 @@ again:
 * interface until it is consumed.
 */
ifp = m-m_pkthdr.rcvif;
+
+#if NBRIDGE  0
+   if (ifp-if_bridgeport  (m-m_flags  M_PROTO1) == 0) {
+   m = bridge_input(m);
+   if (m == NULL)
+   continue;
+   }
+   m-m_flags = ~M_PROTO1;/* Loop prevention */
+#endif
+
SLIST_FOREACH(ifih, ifp-if_inputs, ifih_next) {
if ((*ifih-ifih_input)(m))
break;
Index: net/if_bridge.c
===
RCS file: /cvs/src/sys/net/if_bridge.c,v
retrieving revision 1.241
diff -u -p -r1.241 if_bridge.c
--- net/if_bridge.c 8 Jun 2015 13:44:08 -   1.241
+++ net/if_bridge.c 8 Jun 2015 14:07:57 -
@@ -116,8 +116,6 @@ voidbridge_broadcast(struct bridge_soft
 void   bridge_localbroadcast(struct bridge_softc *, struct ifnet *,
 struct ether_header *, struct mbuf *);
 void   bridge_span(struct bridge_softc *, struct mbuf *);
-struct mbuf *bridge_dispatch(struct bridge_iflist *, struct ifnet *,
-struct mbuf *);
 void   bridge_stop(struct bridge_softc *);
 void   bridge_init(struct bridge_softc *);
 intbridge_bifconf(struct bridge_softc *, struct ifbifconf *);
@@ -1198,7 +1196,7 @@ bridgeintr_frame(struct bridge_softc *sc
 * If packet is unicast, destined for someone on this
 * side of the bridge, drop it.
 */
-   if ((m-m_flags  (M_BCAST | M_MCAST)) == 0) {
+   if (!ETHER_IS_MULTICAST(eh.ether_dhost)) {
if ((dst_p = bridge_rtlookup(sc, dst)) != NULL)
dst_if = dst_p-brt_if;
else
@@ -1207,8 +1205,14 @@ bridgeintr_frame(struct bridge_softc *sc
m_freem(m);
return;
}
-   } else
+   } else {
+   if (memcmp(etherbroadcastaddr, eh.ether_dhost,
+   sizeof(etherbroadcastaddr)) == 0)
+   m-m_flags |= M_BCAST;
+   else
+   m-m_flags |= M_MCAST;
dst_if = NULL;
+   }
 
/*
 * Multicast packets get handled a little differently:
@@ -1304,37 +1308,33 @@ bridgeintr_frame(struct bridge_softc *sc
  * not for us, and schedule an interrupt.
  */
 struct mbuf *
-bridge_input(struct ifnet *ifp, struct ether_header *eh0, struct mbuf *m)
+bridge_input(struct mbuf *m)
 {
+   struct ifnet *ifp;
struct bridge_softc *sc;
struct bridge_iflist *ifl;
+   struct bridge_iflist *srcifl;
struct ether_header *eh;
-#if NVLAN  0
-   uint16_t etype = ntohs(eh0-ether_type);
-#endif /* NVLAN  0 */
+   struct arpcom *ac;
+   struct mbuf_list ml = MBUF_LIST_INITIALIZER();
+   struct mbuf *mc;
+   int s;
 
/*
 * Make sure this interface is a bridge member.
 */
-   if (ifp == NULL || ifp-if_bridgeport == NULL || m == NULL)
+   ifp = m-m_pkthdr.rcvif;
+   if (ifp == NULL || ifp-if_bridgeport == NULL)
return (m);
 
if ((m-m_flags  M_PKTHDR) == 0)
panic(bridge_input(): no HDR);
 
-   m-m_flags = ~M_PROTO1;/* Loop prevention */
-
ifl = (struct bridge_iflist *)ifp-if_bridgeport;
sc = ifl-bridge_sc;
if ((sc-sc_if.if_flags  IFF_RUNNING) == 0)
return (m);
 
-   M_PREPEND(m, sizeof(*eh), M_DONTWAIT);
-   if (m == NULL)
-   return (NULL);
-   eh = mtod(m, struct ether_header *);
-   memmove(eh, eh0, sizeof(*eh));
-
 #if NBPFILTER  0
if (sc-sc_if.if_bpf)
bpf_mtap_ether(sc-sc_if.if_bpf, m, BPF_DIRECTION_IN);
@@ -1342,35 +1342,8 @@ bridge_input(struct ifnet *ifp, struct e
 
bridge_span(sc, m);
 
-   m = bridge_dispatch(ifl, ifp, m);
-
-#if NVLAN  0
-   if ((m != NULL)  ((m-m_flags  M_VLANTAG) ||
-   etype == ETHERTYPE_VLAN || etype == ETHERTYPE_QINQ)) {
-   /* The bridge did not want the vlan frame either, drop it. */
-   ifp-if_noproto++;
-   m_freem(m);
-   m = NULL;
-   }
-#endif /* NVLAN  0 */
-
-   return (m);
-}
-
-struct mbuf *
-bridge_dispatch(struct bridge_iflist *ifl, struct 

Re: Simple upd(4) sensors

2015-06-10 Thread Martin Pieuchot
On 02/06/15(Tue) 22:36, David Higgs wrote:
 Here are some new sensors for upd(4) devices.  All exist on my device except 
 AtRateTimeToEmpty, which still seemed a logical addition given that 
 AtRateTimeToFull is already present.
 - AtRateTimeToEmpty
 - RunTimeToEmpty
 - NeedReplacement
 - Overload

Nice.

 If anyone had an AtRate sensor, it was probably producing meaningless output. 
  The relevant spec [0] indicates that these are in minutes, and my device 
 appears to be using seconds; the (previously unscaled) sensor value expects 
 nanoseconds!
 [0] http://www.usb.org/developers/hidpage/pdcv10.pdf
 
 And lastly, the NeedReplacement report has nothing to do with the System 
 Management Bus (SMB), so rename the constant.  Nothing else in the tree 
 appears to use it, so hopefully this is safe.
 
 Feedback and lsusb -v output is welcome, as usual.

Have you got any feedback from upd(4) users?

 --- a/upd.c
 +++ b/upd.c
 @@ -66,7 +66,13 @@ static struct upd_usage_entry upd_usage_
   { HUP_BATTERY,  HUB_DISCHARGING,
   SENSOR_INDICATOR,Discharging },
   { HUP_BATTERY,  HUB_ATRATE_TIMETOFULL,
 - SENSOR_TIMEDELTA,AtRateTimeToFull }
 + SENSOR_TIMEDELTA,AtRateTimeToFull },
 + { HUP_BATTERY,  HUB_ATRATE_TIMETOEMPTY,
 + SENSOR_TIMEDELTA,AtRateTimeToEmpty },
 + { HUP_BATTERY,  HUB_RUNTIMETO_EMPTY,
 + SENSOR_TIMEDELTA,RunTimeToEmpty },
 + { HUP_BATTERY,  HUB_NEED_REPLACEMENT,
 + SENSOR_INDICATOR,NeedReplacement },
  };
  static struct upd_usage_entry upd_usage_roots[] = {
   { HUP_BATTERY,  HUB_BATTERY_PRESENT,
 @@ -75,7 +81,9 @@ static struct upd_usage_entry upd_usage_
   { HUP_POWER,HUP_SHUTDOWN_IMMINENT,
   SENSOR_INDICATOR,ShutdownImminent },
   { HUP_BATTERY,  HUB_AC_PRESENT,
 - SENSOR_INDICATOR,ACPresent }
 + SENSOR_INDICATOR,ACPresent },
 + { HUP_POWER,HUP_OVERLOAD,
 + SENSOR_INDICATOR,Overload },
  };
  #define UPD_MAX_SENSORS  (nitems(upd_usage_batdep) + 
 nitems(upd_usage_roots))
  
 @@ -410,6 +418,12 @@ upd_sensor_update(struct upd_softc *sc,
   case HUB_FULLCHARGE_CAPACITY:
   adjust = 1000; /* scale adjust */
   break;
 + case HUB_ATRATE_TIMETOFULL:
 + case HUB_ATRATE_TIMETOEMPTY:
 + case HUB_RUNTIMETO_EMPTY:
 + /* spec says minutes, not seconds */
 + adjust = 10LL;
 + break;
   default:
   adjust = 1; /* no scale adjust */
   break;
 --- a/usbhid.h
 +++ b/usbhid.h
 @@ -213,7 +213,7 @@ struct usb_hid_descriptor {
  #define HUB_CONDITIONING_FLAG0x0048
  #define HUB_ATRATE_OK0x0049
  #define HUB_SMB_ERROR_CODE   0x004a
 -#define HUB_SMB_NEED_REPLACE 0x004b
 +#define HUB_NEED_REPLACEMENT 0x004b
  #define HUB_ATRATE_TIMETOFULL0x0060
  #define HUB_ATRATE_TIMETOEMPTY   0x0061
  #define HUB_AVERAGE_CURRENT  0x0062
 
 



RTF_LOCAL and permanent ARP

2015-06-04 Thread Martin Pieuchot
I'd like to put the link-layer address back into the gateway field of
RTF_LOCAL addresses.  The problem is that RTF_LOCAL routes are also
marked as RTF_LLINFO and a lot of code assume (correctly) that such
routes contain valid ARP or ND information.

I believe we decided to use an ``empty'' lladdr because previously all
the routes created via rt_ifa_add(9) were using the same code and we
needed the exact same gateway to remove MPATH routes.  But now that
only RTF_LOCAL routes use this code and taking into consideration that
such route *cannot* be MPATH, we can simply use ifp-if_sadl instead
of a blank sockaddr_dl.

This should also fix the (imcomplete) output in arp(8) and ndp(8).

Ok?

Index: net/route.c
===
RCS file: /cvs/src/sys/net/route.c,v
retrieving revision 1.212
diff -u -p -r1.212 route.c
--- net/route.c 26 May 2015 12:19:51 -  1.212
+++ net/route.c 4 Jun 2015 10:03:51 -
@@ -1121,27 +1121,23 @@ rt_maskedcopy(struct sockaddr *src, stru
 int
 rt_ifa_add(struct ifaddr *ifa, int flags, struct sockaddr *dst)
 {
+   struct ifnet*ifp = ifa-ifa_ifp;
struct rtentry  *rt, *nrt = NULL;
struct sockaddr_rtlabel  sa_rl;
-   struct sockaddr_dl   sa_dl = { sizeof(sa_dl), AF_LINK };
struct rt_addrinfo   info;
-   u_short  rtableid = ifa-ifa_ifp-if_rdomain;
-   u_int8_t prio = ifa-ifa_ifp-if_priority + RTP_STATIC;
+   u_short  rtableid = ifp-if_rdomain;
+   u_int8_t prio = ifp-if_priority + RTP_STATIC;
int  error;
 
-   sa_dl.sdl_type = ifa-ifa_ifp-if_type;
-   sa_dl.sdl_index = ifa-ifa_ifp-if_index;
-
memset(info, 0, sizeof(info));
info.rti_ifa = ifa;
info.rti_flags = flags | RTF_MPATH;
info.rti_info[RTAX_DST] = dst;
if (flags  RTF_LLINFO)
-   info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)sa_dl;
+   info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)ifp-if_sadl;
else
info.rti_info[RTAX_GATEWAY] = ifa-ifa_addr;
-   info.rti_info[RTAX_LABEL] =
-   rtlabel_id2sa(ifa-ifa_ifp-if_rtlabelid, sa_rl);
+   info.rti_info[RTAX_LABEL] = rtlabel_id2sa(ifp-if_rtlabelid, sa_rl);
 
 #ifdef MPLS
if ((flags  RTF_MPLS) == RTF_MPLS) {
@@ -1189,14 +1185,14 @@ rt_ifa_add(struct ifaddr *ifa, int flags
 int
 rt_ifa_del(struct ifaddr *ifa, int flags, struct sockaddr *dst)
 {
+   struct ifnet*ifp = ifa-ifa_ifp;
struct rtentry  *rt, *nrt = NULL;
struct mbuf *m = NULL;
struct sockaddr *deldst;
struct rt_addrinfo   info;
struct sockaddr_rtlabel  sa_rl;
-   struct sockaddr_dl   sa_dl = { sizeof(sa_dl), AF_LINK };
-   u_short  rtableid = ifa-ifa_ifp-if_rdomain;
-   u_int8_t prio = ifa-ifa_ifp-if_priority + RTP_STATIC;
+   u_short  rtableid = ifp-if_rdomain;
+   u_int8_t prio = ifp-if_priority + RTP_STATIC;
int  error;
 
 #ifdef MPLS
@@ -1227,19 +1223,13 @@ rt_ifa_del(struct ifaddr *ifa, int flags
}
}
 
-   sa_dl.sdl_type = ifa-ifa_ifp-if_type;
-   sa_dl.sdl_index = ifa-ifa_ifp-if_index;
-
memset(info, 0, sizeof(info));
info.rti_ifa = ifa;
info.rti_flags = flags;
info.rti_info[RTAX_DST] = dst;
-   if (flags  RTF_LLINFO)
-   info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)sa_dl;
-   else
+   if ((flags  RTF_LLINFO) == 0)
info.rti_info[RTAX_GATEWAY] = ifa-ifa_addr;
-   info.rti_info[RTAX_LABEL] =
-   rtlabel_id2sa(ifa-ifa_ifp-if_rtlabelid, sa_rl);
+   info.rti_info[RTAX_LABEL] = rtlabel_id2sa(ifp-if_rtlabelid, sa_rl);
 
if ((flags  RTF_HOST) == 0)
info.rti_info[RTAX_NETMASK] = ifa-ifa_netmask;
Index: netinet6/nd6.c
===
RCS file: /cvs/src/sys/netinet6/nd6.c,v
retrieving revision 1.136
diff -u -p -r1.136 nd6.c
--- netinet6/nd6.c  15 May 2015 12:00:57 -  1.136
+++ netinet6/nd6.c  4 Jun 2015 09:51:54 -
@@ -651,7 +651,6 @@ nd6_lookup(struct in6_addr *addr6, int c
}
if (!rt) {
if (create  ifp) {
-   struct sockaddr_dl sa_dl = { sizeof(sa_dl), AF_LINK };
struct rt_addrinfo info;
int e;
 
@@ -667,9 +666,6 @@ nd6_lookup(struct in6_addr *addr6, int c
if (ifa == NULL)
return (NULL);
 
-   sa_dl.sdl_type = ifp-if_type;
-   sa_dl.sdl_index = ifp-if_index;
-
/*
 * Create a new route.  RTF_LLINFO is necessary
 * to 

Re: Conver bridge(4) to if_input()

2015-06-22 Thread Martin Pieuchot
On 22/06/15(Mon) 17:12, Alexander Bluhm wrote:
 On Wed, Jun 17, 2015 at 02:08:14PM +0200, Martin Pieuchot wrote:
  diff -u -p -r1.340 if.c
  --- net/if.c16 Jun 2015 11:09:39 -  1.340
  +++ net/if.c17 Jun 2015 12:03:36 -
  @@ -530,6 +530,15 @@ if_input_process(void *xmq)
  continue;
  }
   
  +#if NBRIDGE  0
  +   if (ifp-if_bridgeport  (m-m_flags  M_PROTO1) == 0) {
  +   m = bridge_input(m);
  +   if (m == NULL)
  +   continue;
  +   }
  +   m-m_flags = ~M_PROTO1;/* Loop prevention */
  +#endif
 
 Should we reset the loop prevention only if our call to bridge_input()
 did set M_PROTO1?  Something like this
 
   if (ifp-if_bridgeport  (m-m_flags  M_PROTO1) == 0) {
   m = bridge_input(m);
   if (m == NULL)
   continue;
   m-m_flags = ~M_PROTO1;/* Loop prevention */
   }

Yes and no :)

bridge_input() will set M_PROTO1 on the mbuf copies that it enqueues on
its ports.

If you receive a packet on em0 in bridge0 with tun0, you want to call
bridge_input() only once, but you'll call if_input() in em0 and tun0.

So the first packet will enter if_input() without M_PROTO1, go through
bridge_input() then be processed by the stack.  Then the copy of this
packet created in bridge_input() will have the M_PROTO1 flag set and
when it will be dequeued by if_input() it won't be passed to 
bridge_input() again.

Does that make sense?



Stop G/C mbufs in if_detach()

2015-06-23 Thread Martin Pieuchot
When an interface is detached or destroyed the CPU executing if_detach()
removes all the mbufs received by this interface on three queues:
ARP, IPv4 and IPv6 protocol queues.

This made sense to avoid referencing a dangling rcvif pointer. But now
mbufs contain unique interface indexes and protocol interrupt routines
handle just fine the case where if_get() returns a NULL pointer.

So I'd like to get rid of this explicit garbage collection.  Note that
this will leave mbufs on the protocol queues until the next netisr is
executed for the corresponding queue.  This is a functional change but
I don't think it's a problem.  It only matters if you destroy your only
pseudo interface or unplug your single USB interface without replugging
it.

Comments, oks?

Index: net/if.c
===
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.341
diff -u -p -r1.341 if.c
--- net/if.c23 Jun 2015 09:42:23 -  1.341
+++ net/if.c23 Jun 2015 10:54:04 -
@@ -128,8 +128,6 @@ voidif_attachsetup(struct ifnet *);
 void   if_attachdomain1(struct ifnet *);
 void   if_attach_common(struct ifnet *);
 
-intif_detach_filter(void *, const struct mbuf *);
-void   if_detach_queues(struct ifnet *, struct niqueue *);
 void   if_detached_start(struct ifnet *);
 intif_detached_ioctl(struct ifnet *, u_long, caddr_t);
 
@@ -644,23 +642,6 @@ if_detach(struct ifnet *ifp)
pfi_detach_ifnet(ifp);
 #endif
 
-   /*
-* remove packets came from ifp, from software interrupt queues.
-* net/netisr_dispatch.h is not usable, as some of them use
-* strange queue names.
-*/
-#define IF_DETACH_QUEUES(x) \
-do { \
-   extern struct niqueue x; \
-   if_detach_queues(ifp,  x); \
-} while (0)
-   IF_DETACH_QUEUES(arpintrq);
-   IF_DETACH_QUEUES(ipintrq);
-#ifdef INET6
-   IF_DETACH_QUEUES(ip6intrq);
-#endif
-#undef IF_DETACH_QUEUES
-
/* Remove the interface from the list of all interfaces.  */
TAILQ_REMOVE(ifnet, ifp, if_list);
if (ISSET(ifp-if_xflags, IFXF_TXREADY))
@@ -701,34 +682,6 @@ do { \
 
ifindex2ifnet[ifp-if_index] = NULL;
splx(s);
-}
-
-int
-if_detach_filter(void *ctx, const struct mbuf *m)
-{
-   struct ifnet *ifp = ctx;
-
-#ifdef DIAGNOSTIC
-   if ((m-m_flags  M_PKTHDR) == 0)
-   return (0);
-#endif
-
-   return (m-m_pkthdr.ph_ifidx == ifp-if_index);
-}
-
-void
-if_detach_queues(struct ifnet *ifp, struct niqueue *niq)
-{
-   struct mbuf *m0, *m;
-
-   m0 = niq_filter(niq, if_detach_filter, ifp);
-   while (m0 != NULL) {
-   m = m0;
-   m0 = m-m_nextpkt;
-
-   m-m_nextpkt = NULL;
-   m_freem(m);
-   }
 }
 
 /*



Remove a #if NCARP hack

2015-06-24 Thread Martin Pieuchot
Time goes by and things must be cleaned.  Thanks to claudio@'s work
to support multiple connected routes carp(4) now have its own default
priority.  So I audited the remaining iterations on finet and I
couldn't find any good reason to force carp(4) interfaces at a special
position in the list of interfaces.

OpenBSD's network stack changed in the past years and ifa_ifwithnet()
is now only used in a few places that should not matter.  Maybe a
brave soul (anyone?) will even get rid of this function completely.

The other place where this order could matter is in6_ifawithscope().
But stsp@ added an explicit check for carp(4) interfaces last October. 

Remember also that since 5.7 the ``carpdev'' argument is mandatory. So
you're rather unlikely to have a carp(4) interface inserted in ifnet
before its parent interface.

Finally I sleep better with fewer #if PSEUDOFROG in the stack :)

Ok?

Index: net/if.c
===
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.333
diff -u -p -r1.333 if.c
--- net/if.c20 May 2015 08:28:54 -  1.333
+++ net/if.c26 May 2015 08:45:51 -
@@ -373,24 +373,8 @@ if_attachhead(struct ifnet *ifp)
 void
 if_attach(struct ifnet *ifp)
 {
-#if NCARP  0
-   struct ifnet *before = NULL;
-#endif
-
if_attach_common(ifp);
-
-#if NCARP  0
-   if (ifp-if_type != IFT_CARP)
-   TAILQ_FOREACH(before, ifnet, if_list)
-   if (before-if_type == IFT_CARP)
-   break;
-   if (before == NULL)
-   TAILQ_INSERT_TAIL(ifnet, ifp, if_list);
-   else
-   TAILQ_INSERT_BEFORE(before, ifp, if_list);
-#else
TAILQ_INSERT_TAIL(ifnet, ifp, if_list);
-#endif
if_attachsetup(ifp);
 }
 



Re: bridge_output() without m_buf_tag

2015-06-24 Thread Martin Pieuchot
On 17/06/15(Wed) 14:07, Martin Pieuchot wrote:
 On 08/06/15(Mon) 15:58, Martin Pieuchot wrote:
  Diff below moves bridge_output() to if_output().  It fixes the case I
  already described some weeks ago where you have a physical interface
  in a bridge and a vlan on top of it which is not in the bridge.
  
  It also change the loop prevention code to use M_PROTO1 like in the
  input path.
  
  Tests, comments and oks welcome.
 
 Updated diff to match the recent if_get() change.  I've got one positive
 report so far, any ok?

I'm still looking for oks.

 
 Index: net/if.c
 ===
 RCS file: /cvs/src/sys/net/if.c,v
 retrieving revision 1.340
 diff -u -p -r1.340 if.c
 --- net/if.c  16 Jun 2015 11:09:39 -  1.340
 +++ net/if.c  17 Jun 2015 12:01:12 -
 @@ -449,6 +449,19 @@ if_output(struct ifnet *ifp, struct mbuf
   int s, length, error = 0;
   unsigned short mflags;
  
 +#ifdef DIAGNOSTIC
 + if (ifp-if_rdomain != rtable_l2(m-m_pkthdr.ph_rtableid)) {
 + printf(%s: trying to send packet on wrong domain. 
 + if %d vs. mbuf %d\n, ifp-if_xname, ifp-if_rdomain,
 + rtable_l2(m-m_pkthdr.ph_rtableid));
 + }
 +#endif
 +
 +#if NBRIDGE  0
 + if (ifp-if_bridgeport  (m-m_flags  M_PROTO1) == 0)
 + return (bridge_output(ifp, m, NULL, NULL));
 +#endif
 +
   length = m-m_pkthdr.len;
   mflags = m-m_flags;
  
 Index: net/if_bridge.c
 ===
 RCS file: /cvs/src/sys/net/if_bridge.c,v
 retrieving revision 1.244
 diff -u -p -r1.244 if_bridge.c
 --- net/if_bridge.c   16 Jun 2015 11:09:39 -  1.244
 +++ net/if_bridge.c   17 Jun 2015 12:01:12 -
 @@ -2635,10 +2635,12 @@ bridge_ifenqueue(struct bridge_softc *sc
  {
   int error, len;
  
 + /* Loop prevention. */
 + m-m_flags |= M_PROTO1;
 +
  #if NGIF  0
   /* Packet needs etherip encapsulation. */
   if (ifp-if_type == IFT_GIF) {
 - m-m_flags |= M_PROTO1;
  
   /* Count packets input into the gif from outside */
   ifp-if_ipackets++;
 Index: net/if_ethersubr.c
 ===
 RCS file: /cvs/src/sys/net/if_ethersubr.c,v
 retrieving revision 1.205
 diff -u -p -r1.205 if_ethersubr.c
 --- net/if_ethersubr.c16 Jun 2015 11:09:39 -  1.205
 +++ net/if_ethersubr.c17 Jun 2015 12:01:12 -
 @@ -181,15 +181,6 @@ ether_output(struct ifnet *ifp, struct m
   struct arpcom *ac = (struct arpcom *)ifp;
   int error = 0;
  
 -#ifdef DIAGNOSTIC
 - if (ifp-if_rdomain != rtable_l2(m-m_pkthdr.ph_rtableid)) {
 - printf(%s: trying to send packet on wrong domain. 
 - if %d vs. mbuf %d, AF %d\n, ifp-if_xname,
 - ifp-if_rdomain, rtable_l2(m-m_pkthdr.ph_rtableid),
 - dst-sa_family);
 - }
 -#endif
 -
   esrc = ac-ac_enaddr;
  
   if ((ifp-if_flags  (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING))
 @@ -276,47 +267,6 @@ ether_output(struct ifnet *ifp, struct m
   eh-ether_type = etype;
   memcpy(eh-ether_dhost, edst, sizeof(eh-ether_dhost));
   memcpy(eh-ether_shost, esrc, sizeof(eh-ether_shost));
 -
 -#if NBRIDGE  0
 - /*
 -  * Interfaces that are bridgeports need special handling for output.
 -  */
 - if (ifp-if_bridgeport) {
 - struct m_tag *mtag;
 -
 - /*
 -  * Check if this packet has already been sent out through
 -  * this bridgeport, in which case we simply send it out
 -  * without further bridge processing.
 -  */
 - for (mtag = m_tag_find(m, PACKET_TAG_BRIDGE, NULL); mtag;
 - mtag = m_tag_find(m, PACKET_TAG_BRIDGE, mtag)) {
 -#ifdef DEBUG
 - /* Check that the information is there */
 - if (mtag-m_tag_len != sizeof(caddr_t)) {
 - error = EINVAL;
 - goto bad;
 - }
 -#endif
 - if (!memcmp(ifp-if_bridgeport, mtag + 1,
 - sizeof(caddr_t)))
 - break;
 - }
 - if (mtag == NULL) {
 - /* Attach a tag so we can detect loops */
 - mtag = m_tag_get(PACKET_TAG_BRIDGE, sizeof(caddr_t),
 - M_NOWAIT);
 - if (mtag == NULL) {
 - error = ENOBUFS;
 - goto bad;
 - }
 - memcpy(mtag + 1, ifp-if_bridgeport, sizeof(caddr_t));
 - m_tag_prepend(m, mtag);
 - error = bridge_output(ifp, m, NULL, NULL);
 - return (error);
 - }
 - }
 -#endif
  
   return (if_output(ifp, m));
  bad:
 Index: sys/mbuf.h

Re: macppc IPI counter

2015-06-24 Thread Martin Pieuchot
On 24/06/15(Wed) 16:45, Mark Kettenis wrote:
  Date: Wed, 24 Jun 2015 16:11:08 +0200
  From: Martin Pieuchot m...@openbsd.org
  
  Use only one ipi counter just like other archs do.
  
  ok?
 
 Problem is that the event counters aren't really MP safe.  By keeping
 them per-CPU you circumvent any problems with that.

Do you suggest that I add another 4 counters for my CPUs #2 and #3?

Honestly the trashing is much more noticeable on the clock counter which
also not protected.  But as you know counters are 64bit long so we can't
easily use atomic operations like on sparc64.  Plus if I'm not mistaken
i386 and amd64 are already doing that...

If you have a suggestion for a correct fix, I'm interested, but I
believe this should be done anyway.



macppc IPI counter

2015-06-24 Thread Martin Pieuchot
Use only one ipi counter just like other archs do.

ok?

Index: dev/openpic.c
===
RCS file: /cvs/src/sys/arch/macppc/dev/openpic.c,v
retrieving revision 1.81
diff -u -p -r1.81 openpic.c
--- dev/openpic.c   24 Jun 2015 11:58:06 -  1.81
+++ dev/openpic.c   24 Jun 2015 13:47:34 -
@@ -130,11 +130,9 @@ void   openpic_ipi_ddb(void);
 #define IPI_VECTOR_NOP 64
 #define IPI_VECTOR_DDB 65
 
-static struct evcount ipi_ddb[PPC_MAXPROCS];
-static struct evcount ipi_nop[PPC_MAXPROCS];
+static struct evcount ipi_count;
 
-static int ipi_nopirq = IPI_VECTOR_NOP;
-static int ipi_ddbirq = IPI_VECTOR_DDB;
+static int ipi_irq = IPI_VECTOR_NOP;
 
 intr_send_ipi_t openpic_send_ipi;
 #endif /* MULTIPROCESSOR */
@@ -288,11 +286,7 @@ openpic_attach(struct device *parent, st
x |= 15  OPENPIC_PRIORITY_SHIFT;
openpic_write(OPENPIC_IPI_VECTOR(1), x);
 
-   /* XXX - ncpus */
-   evcount_attach(ipi_nop[0], ipi_nop0, ipi_nopirq);
-   evcount_attach(ipi_nop[1], ipi_nop1, ipi_nopirq);
-   evcount_attach(ipi_ddb[0], ipi_ddb0, ipi_ddbirq);
-   evcount_attach(ipi_ddb[1], ipi_ddb1, ipi_ddbirq);
+   evcount_attach(ipi_count, ipi, ipi_irq);
 #endif
 
/* clear all pending interrunts */
@@ -638,16 +632,11 @@ openpic_ext_intr(void)
return;
}
 #ifdef MULTIPROCESSOR
-   if (irq == IPI_VECTOR_NOP) {
-   ipi_nop[ci-ci_cpuid].ec_count++;
+   if (irq == IPI_VECTOR_NOP || irq == IPI_VECTOR_DDB) {
+   ipi_count.ec_count++;
openpic_eoi(ci-ci_cpuid);
-   irq = openpic_read_irq(ci-ci_cpuid);
-   continue;
-   }
-   if (irq == IPI_VECTOR_DDB) {
-   ipi_ddb[ci-ci_cpuid].ec_count++;
-   openpic_eoi(ci-ci_cpuid);
-   openpic_ipi_ddb();
+   if (irq == IPI_VECTOR_DDB)
+   openpic_ipi_ddb();
irq = openpic_read_irq(ci-ci_cpuid);
continue;
}
@@ -758,7 +747,6 @@ openpic_send_ipi(struct cpu_info *ci, in
 void
 openpic_ipi_ddb(void)
 {
-   DPRINTF(ipi_ddb() called\n);
 #ifdef DDB
Debugger();
 #endif



Re: umass quirk for ignoring residue?

2015-06-22 Thread Martin Pieuchot
On 20/06/15(Sat) 18:50, frantisek holop wrote:
 trying to use a replacement usb enclosure for an old 2.5 IDE drive.
 [...] 
 Controller /dev/usb0:
 addr 1: high speed, self powered, config 1, EHCI root hub(0x), 
 Intel(0x8086), rev 1.00
  port 2 addr 6: high speed, self powered, config 1, USB 2.0  IDE 
 DEVICE(0x6600), Super Top(0x14cd), rev 2.01, iSerialNumber ??
 
 apparently the USB 2.0  IDE DEVICE(0x6600) from Super Top(0x14cd)
 is a well known offender...
 
 netbsd/dev/usb: (in usbdevs but no quirk)
   usbdevs
   504:vendor SUPERTOP  0x14cd  SuperTop
   3116:product SUPERTOP IDEBRIDGE  0x6600  SuperTop IDE Bridge
 
 
 freebsd/dev/usb:
   usbdevs
   649:vendor SUPERTOP  0x14cd  Super Top
   4316:product SUPERTOP IDE0x6600  USB-IDE
 
   quirk/usb_quirk.c
   410: USB_QUIRK(SUPERTOP, IDE, 0x, 0x, UQ_MSC_IGNORE_RESIDUE,
   411: UQ_MSC_NO_SYNC_CACHE),
 
 
 linux/drivers/usb:
   storage/unusual_devs.h
   1976:UNUSUAL_DEV(  0x14cd, 0x6600, 0x0201, 0x0201,
   1977:Super Top,
   1978:IDE DEVICE,
   1979:USB_SC_DEVICE, USB_PR_DEVICE, NULL,
   1980:US_FL_IGNORE_RESIDUE ),
 
 
 trying to compare the different kernels re: ignoring residue,
 in openbsd ADEV_NOSENSE seemed like the way to go:

I believe it's the way to go.

 Index: umass_quirks.c
 ===
 RCS file: /cvs/src/sys/dev/usb/umass_quirks.c,v
 retrieving revision 1.31
 diff -u -p -r1.31 umass_quirks.c
 --- umass_quirks.c5 Oct 2014 08:34:14 -   1.31
 +++ umass_quirks.c20 Jun 2015 16:24:11 -
 @@ -465,6 +465,14 @@ const struct umass_quirk umass_quirks[] 
UMATCH_VENDOR_PRODUCT,
NULL, NULL
   },
 +
 + { { USB_VENDOR_SUPERTOP, USB_PRODUCT_SUPERTOP_IDEBRIDGE },
 +  UMASS_WPROTO_UNSPEC, UMASS_CPROTO_UNSPEC,
 +  0,
 +  ADEV_NOSENSE,
 +  UMATCH_VENDOR_PRODUCT,
 +  NULL, NULL
 + },
  };
  
  const struct umass_quirk *
 Index: usbdevs
 ===
 RCS file: /cvs/src/sys/dev/usb/usbdevs,v
 retrieving revision 1.651
 diff -u -p -r1.651 usbdevs
 --- usbdevs   16 Jun 2015 05:07:25 -  1.651
 +++ usbdevs   20 Jun 2015 16:24:11 -
 @@ -534,6 +534,7 @@ vendor SILICOM0x1485  Silicom
  vendor RALINK0x148f  Ralink Technology
  vendor STARTECH  0x14b0  StarTech.com
  vendor CONCEPTRONIC2 0x14b2  Conceptronic
 +vendor SUPERTOP  0x14cd  SuperTop
  vendor PLANEX3   0x14ea  Planex Communications
  vendor SILICONPORTALS0x1527  Silicon Portals
  vendor UBLOX 0x1546  U-blox
 @@ -4058,6 +4059,9 @@ product SUNTAC VS10U0x0009  Slipper U
  product SUNTAC IS96U 0x000a  Ir-Trinity
  product SUNTAC AS64LX0x000b  U-Cable type A3
  product SUNTAC AS144L4   0x0011  U-Cable type A4
 +
 +/* SuperTop products */
 +product SUPERTOP IDEBRIDGE   0x6600  SuperTop IDE Bridge
  
  /* System TALKS, Inc. */
  product  SYSTEMTALKS SGCX2UL 0x1920  SGC-X2UL
 
 
 but this did not seem to help.
 
 here is the output from usb/umass debug enabled kernel with _only_
 the usbdevs part of the patch, not the quirk.
 connecting the drive, waiting a couple of seconds
 and issuing sudo fdisk sd1:
 
 Jun 20 17:55:13 hatvan /bsd: umass0 at uhub0
 Jun 20 17:55:13 hatvan /bsd:  port 2 configuration 1 interface 0 Super Top 
 USB 2.0  IDE DEVICE rev 2.00/2.01 addr 6
 Jun 20 17:55:13 hatvan /bsd: umass0: using SCSI over Bulk-Only
 Jun 20 17:55:14 hatvan /bsd: umass0: Get Max Lun
 Jun 20 17:55:14 hatvan /bsd: umass0: Max Lun 0
 Jun 20 17:55:14 hatvan /bsd: umass0: opening iface 0xd3364360 epaddr 2 for 
 BULKOUT
 Jun 20 17:55:14 hatvan /bsd: umass0: opening iface 0xd3364360 epaddr 129 for 
 BULKIN
 Jun 20 17:55:14 hatvan /bsd: umass0: umass_attach_bus: SCSI
 Jun 20 17:55:14 hatvan /bsd: sc = 0x0xd3361400, scbus = 0x0xd3362800
 Jun 20 17:55:14 hatvan /bsd: scsibus4 at umass0: 2 targets, initiator 0
 Jun 20 17:55:14 hatvan /bsd: umass0: umass_scsi_cmd: at 1434815714.000358: 
 1:0 xs=0xd9181000 cmd=0x00 datalen=0 (quirks=0x400e, poll=0)
 Jun 20 17:55:14 hatvan /bsd: umass_scsi_cmd: async dir=0, cmdlen=6 datalen=0
 Jun 20 17:55:14 hatvan /bsd: umass0: umass_bbb_transfer cmd=0x00
 Jun 20 17:55:14 hatvan /bsd: umass0: CBW 158: cmdlen=6 
 (0x), data = 0 bytes, dir = out
 Jun 20 17:55:14 hatvan /bsd: umass0: start xfer buffer=0xd3361464 buflen=31 
 flags=0x0 timeout=15000
 Jun 20 17:55:14 hatvan /bsd: umass0: Handling BBB state 1 (BBB CBW), 
 xfer=0xd9183460, NORMAL_COMPLETION
 Jun 20 17:55:14 hatvan /bsd: umass0: no data phase
 Jun 20 17:55:14 hatvan /bsd: umass0: start xfer buffer=0xd3361483 buflen=13 
 flags=0x0 timeout=15000
 Jun 20 17:55:14 hatvan /bsd: umass0: Handling BBB state 4 (BBB CSW, 1st 
 

Re: SMP steroids for PF

2015-06-26 Thread Martin Pieuchot
On 26/06/15(Fri) 16:00, Alexandr Nedvedicky wrote:
 Hello Martin,
 
 I accept or your comments. I just have few quick notes/questions now.
 
  2)  I saw that you found some ALTQ leftovers, you have some Solaris
 (2) I think ALTQs leftovers are still in CVS repo, will double check
 anyway. Stack alignment is not Solaris compatibility hack it's sparc
 compatibility. May be your C compiler takes care of this and grants
 16/32/64 bit stack alignment. I have not examined build process
 that closely yet.

By Solaris compatibility I'm referring to the size of ``sa_family_t''
and the corresponding changes in struct pfr_table.

 (3)
  use atomic operations rather than per-CPU counters or any other
  solution?  I'm also raising this question because some counters are
 can you point me to manual page or source code sample so I can have a look how
 to use per-CPU counter?

There's no such manual.  I was more asking about the reason for using
atomic operations.  Is it because you're trying to use existing APIs?
Are the macros implemented differently in Solaris?

  5)  I'm not sure to understand the goal of the new pf_refcnt_t type
 (5) Solaris defines pf_refcnt_t as 64-bit unsigned integer, pf_refcnt_t hopes
 to make porting easier. It can be defined as 32-bit on 32-bit machines.

Using a long on OpenBSD will grantee that the value fits in a register,
so it should be fine.

  7)  The PF_SMP_INSERT_WQ() macro to replace SLIST_INSERT() seems over-
 PF_SMP_INSERT_WQ() purpose of those is to allow every CPU/thread to
 operate on its own work-queue of ktables/kentries. The current pf
 uses 'intrusive' link members pfrkt_workq/pfrke_workq in 
 pfr_ktable/pfr_kentry.
 The only idea is to stay as much close to current version as possible.

I understand that you want to stay close to the current version.  I'm
just saying that we can also modify the current version to reduce the
size of your diff.

Regards,
Martin



Re: SMP steroids for PF

2015-06-26 Thread Martin Pieuchot
On 26/06/15(Fri) 17:19, Alexandr Nedvedicky wrote:
 On Fri, Jun 26, 2015 at 04:34:06PM +0200, Martin Pieuchot wrote:
  On 26/06/15(Fri) 16:00, Alexandr Nedvedicky wrote:
2)  I saw that you found some ALTQ leftovers, you have some Solaris
   (2) I think ALTQs leftovers are still in CVS repo, will double check
   anyway. Stack alignment is not Solaris compatibility hack it's sparc
   compatibility. May be your C compiler takes care of this and grants
   16/32/64 bit stack alignment. I have not examined build process
   that closely yet.
  
  By Solaris compatibility I'm referring to the size of ``sa_family_t''
  and the corresponding changes in struct pfr_table.
  
 I see. sa_family_t is kind of surprise it's defined as uint16_t on Solaris.
 PF at various places mixes sa_family_t with u_int8_t, so all af variables
 on Solaris had to be turned to sa_family_t. Some of those changes leaked
 backed during merge to current.

Even if on OpenBSD sa_family_t is defined as uint8_t, I'd argue that
for portability reasons we should use the correct type where it is
appropriate.  And here your leak is a good example of portability.

If you have some changes that could improve the portability of the
software and if you feel like sharing them, I'd be interested.

Regards,
Martin



Double free in trunk(4)

2015-06-10 Thread Martin Pieuchot
During clone/destroy stress tests on pseudo-interfaces I found a double
free easily reproducible with dhclient(8) running on top of a trunk(4).

The problem comes from trunk_ether_delmulti() which is almost identical
to carp's version except that it always free mc.

So when you do # ifconfig trunk0 destroy the kernel first brings the
interface down and generates a routing message.  dhclient(8) receives
this message and removes the address it's configured on trunk0 and with
it the default multicast group.  If dhclient(8) lose the race, it will
fail to remove the multicast address but it will free mc!

Then ifconfig's thread which was asleep in trunk_ether_purgemulti() - 
trunk_ioctl_allports() wakes up and tries to free mc a second time:

 uvm_fault(0xd5647bd0, 0x0, 0, 1) - e
 kernel: page fault trap, code=0
 Stopped at  trunk_ether_purgemulti+0xc2:movl0x104(%edx),%eax
 ddb tr
 trunk_ether_purgemulti(d131b800,d131b800,0,d041d091,0) at trunk_ether_purgemult
 i+0xc2
 trunk_clone_destroy(d131b800,0,f3766dec,d1064e94,f3766ef4) at trunk_clone_destr
 oy+0x16
 ifioctl(d54d10f0,80206979,f3766e84,d54a42dc,2) at ifioctl+0x232
 sys_ioctl(d54a42dc,f3766f60,f3766f80,d0566f17,d54a42dc) at sys_ioctl+0x257
 syscall() at syscall+0x247

Here's a simple fix that also reduces the differences with carp's
version.  ok?

Index: net/if_trunk.c
===
RCS file: /cvs/src/sys/net/if_trunk.c,v
retrieving revision 1.101
diff -u -p -r1.101 if_trunk.c
--- net/if_trunk.c  9 Jun 2015 14:50:14 -   1.101
+++ net/if_trunk.c  10 Jun 2015 15:30:10 -
@@ -857,18 +857,20 @@ trunk_ether_delmulti(struct trunk_softc 
if ((error = ether_delmulti(ifr, tr-tr_ac)) != ENETRESET)
return (error);
 
-   if ((error = trunk_ioctl_allports(tr, SIOCDELMULTI,
-   (caddr_t)ifr)) != 0) {
+   /* We no longer use this multicast address.  Tell parent so. */
+   error = trunk_ioctl_allports(tr, SIOCDELMULTI, (caddr_t)ifr);
+   if (error == 0) {
+   SLIST_REMOVE(tr-tr_mc_head, mc, trunk_mc, mc_entries);
+   free(mc, M_DEVBUF, sizeof(*mc));
+   } else {
/* XXX At least one port failed to remove the address */
if (tr-tr_ifflags  IFF_DEBUG) {
printf(%s: failed to remove multicast address 
-   on all ports\n, tr-tr_ifname);
+   on all ports (%d)\n, tr-tr_ifname, error);
}
+   (void)ether_addmulti(ifr, tr-tr_ac);
}
 
-   SLIST_REMOVE(tr-tr_mc_head, mc, trunk_mc, mc_entries);
-   free(mc, M_DEVBUF, 0);
-
return (0);
 }
 
@@ -886,7 +888,7 @@ trunk_ether_purgemulti(struct trunk_soft
trunk_ioctl_allports(tr, SIOCDELMULTI, (caddr_t)ifr);
 
SLIST_REMOVE(tr-tr_mc_head, mc, trunk_mc, mc_entries);
-   free(mc, M_DEVBUF, 0);
+   free(mc, M_DEVBUF, sizeof(*mc));
}
 }
 



Re: bridge_output() without m_buf_tag

2015-06-17 Thread Martin Pieuchot
On 08/06/15(Mon) 15:58, Martin Pieuchot wrote:
 Diff below moves bridge_output() to if_output().  It fixes the case I
 already described some weeks ago where you have a physical interface
 in a bridge and a vlan on top of it which is not in the bridge.
 
 It also change the loop prevention code to use M_PROTO1 like in the
 input path.
 
 Tests, comments and oks welcome.

Updated diff to match the recent if_get() change.  I've got one positive
report so far, any ok?

Index: net/if.c
===
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.340
diff -u -p -r1.340 if.c
--- net/if.c16 Jun 2015 11:09:39 -  1.340
+++ net/if.c17 Jun 2015 12:01:12 -
@@ -449,6 +449,19 @@ if_output(struct ifnet *ifp, struct mbuf
int s, length, error = 0;
unsigned short mflags;
 
+#ifdef DIAGNOSTIC
+   if (ifp-if_rdomain != rtable_l2(m-m_pkthdr.ph_rtableid)) {
+   printf(%s: trying to send packet on wrong domain. 
+   if %d vs. mbuf %d\n, ifp-if_xname, ifp-if_rdomain,
+   rtable_l2(m-m_pkthdr.ph_rtableid));
+   }
+#endif
+
+#if NBRIDGE  0
+   if (ifp-if_bridgeport  (m-m_flags  M_PROTO1) == 0)
+   return (bridge_output(ifp, m, NULL, NULL));
+#endif
+
length = m-m_pkthdr.len;
mflags = m-m_flags;
 
Index: net/if_bridge.c
===
RCS file: /cvs/src/sys/net/if_bridge.c,v
retrieving revision 1.244
diff -u -p -r1.244 if_bridge.c
--- net/if_bridge.c 16 Jun 2015 11:09:39 -  1.244
+++ net/if_bridge.c 17 Jun 2015 12:01:12 -
@@ -2635,10 +2635,12 @@ bridge_ifenqueue(struct bridge_softc *sc
 {
int error, len;
 
+   /* Loop prevention. */
+   m-m_flags |= M_PROTO1;
+
 #if NGIF  0
/* Packet needs etherip encapsulation. */
if (ifp-if_type == IFT_GIF) {
-   m-m_flags |= M_PROTO1;
 
/* Count packets input into the gif from outside */
ifp-if_ipackets++;
Index: net/if_ethersubr.c
===
RCS file: /cvs/src/sys/net/if_ethersubr.c,v
retrieving revision 1.205
diff -u -p -r1.205 if_ethersubr.c
--- net/if_ethersubr.c  16 Jun 2015 11:09:39 -  1.205
+++ net/if_ethersubr.c  17 Jun 2015 12:01:12 -
@@ -181,15 +181,6 @@ ether_output(struct ifnet *ifp, struct m
struct arpcom *ac = (struct arpcom *)ifp;
int error = 0;
 
-#ifdef DIAGNOSTIC
-   if (ifp-if_rdomain != rtable_l2(m-m_pkthdr.ph_rtableid)) {
-   printf(%s: trying to send packet on wrong domain. 
-   if %d vs. mbuf %d, AF %d\n, ifp-if_xname,
-   ifp-if_rdomain, rtable_l2(m-m_pkthdr.ph_rtableid),
-   dst-sa_family);
-   }
-#endif
-
esrc = ac-ac_enaddr;
 
if ((ifp-if_flags  (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING))
@@ -276,47 +267,6 @@ ether_output(struct ifnet *ifp, struct m
eh-ether_type = etype;
memcpy(eh-ether_dhost, edst, sizeof(eh-ether_dhost));
memcpy(eh-ether_shost, esrc, sizeof(eh-ether_shost));
-
-#if NBRIDGE  0
-   /*
-* Interfaces that are bridgeports need special handling for output.
-*/
-   if (ifp-if_bridgeport) {
-   struct m_tag *mtag;
-
-   /*
-* Check if this packet has already been sent out through
-* this bridgeport, in which case we simply send it out
-* without further bridge processing.
-*/
-   for (mtag = m_tag_find(m, PACKET_TAG_BRIDGE, NULL); mtag;
-   mtag = m_tag_find(m, PACKET_TAG_BRIDGE, mtag)) {
-#ifdef DEBUG
-   /* Check that the information is there */
-   if (mtag-m_tag_len != sizeof(caddr_t)) {
-   error = EINVAL;
-   goto bad;
-   }
-#endif
-   if (!memcmp(ifp-if_bridgeport, mtag + 1,
-   sizeof(caddr_t)))
-   break;
-   }
-   if (mtag == NULL) {
-   /* Attach a tag so we can detect loops */
-   mtag = m_tag_get(PACKET_TAG_BRIDGE, sizeof(caddr_t),
-   M_NOWAIT);
-   if (mtag == NULL) {
-   error = ENOBUFS;
-   goto bad;
-   }
-   memcpy(mtag + 1, ifp-if_bridgeport, sizeof(caddr_t));
-   m_tag_prepend(m, mtag);
-   error = bridge_output(ifp, m, NULL, NULL);
-   return (error);
-   }
-   }
-#endif
 
return (if_output(ifp, m));
 bad:
Index: sys/mbuf.h
===
RCS file: /cvs/src/sys

Re: Conver bridge(4) to if_input()

2015-06-17 Thread Martin Pieuchot
On 08/06/15(Mon) 16:11, Martin Pieuchot wrote:
 This is the last pseudo-driver conversion.
 
 The idea is to run bridge_input() *before* any ifih on an interface.
 Doing so allow us to remove the hack between vlan(4) and bridge(4)
 and simplify the logic for stacked ifih.
 
 With that net/if_ethersubr.c is now free from #ifdef NPSEUDODRIVER.
 
 As usual, tests comments and oks welcome.

Updated diff to match recent if_get() change, as for the other bridge
diff I've got one positive test report.

Any ok?

Index: net/if.c
===
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.340
diff -u -p -r1.340 if.c
--- net/if.c16 Jun 2015 11:09:39 -  1.340
+++ net/if.c17 Jun 2015 12:03:36 -
@@ -530,6 +530,15 @@ if_input_process(void *xmq)
continue;
}
 
+#if NBRIDGE  0
+   if (ifp-if_bridgeport  (m-m_flags  M_PROTO1) == 0) {
+   m = bridge_input(m);
+   if (m == NULL)
+   continue;
+   }
+   m-m_flags = ~M_PROTO1;/* Loop prevention */
+#endif
+
/*
 * Pass this mbuf to all input handlers of its
 * interface until it is consumed.
Index: net/if_bridge.c
===
RCS file: /cvs/src/sys/net/if_bridge.c,v
retrieving revision 1.244
diff -u -p -r1.244 if_bridge.c
--- net/if_bridge.c 16 Jun 2015 11:09:39 -  1.244
+++ net/if_bridge.c 17 Jun 2015 12:03:36 -
@@ -116,8 +116,6 @@ voidbridge_broadcast(struct bridge_soft
 void   bridge_localbroadcast(struct bridge_softc *, struct ifnet *,
 struct ether_header *, struct mbuf *);
 void   bridge_span(struct bridge_softc *, struct mbuf *);
-struct mbuf *bridge_dispatch(struct bridge_iflist *, struct ifnet *,
-struct mbuf *);
 void   bridge_stop(struct bridge_softc *);
 void   bridge_init(struct bridge_softc *);
 intbridge_bifconf(struct bridge_softc *, struct ifbifconf *);
@@ -1198,7 +1196,7 @@ bridgeintr_frame(struct bridge_softc *sc
 * If packet is unicast, destined for someone on this
 * side of the bridge, drop it.
 */
-   if ((m-m_flags  (M_BCAST | M_MCAST)) == 0) {
+   if (!ETHER_IS_MULTICAST(eh.ether_dhost)) {
if ((dst_p = bridge_rtlookup(sc, dst)) != NULL)
dst_if = dst_p-brt_if;
else
@@ -1207,8 +1205,14 @@ bridgeintr_frame(struct bridge_softc *sc
m_freem(m);
return;
}
-   } else
+   } else {
+   if (memcmp(etherbroadcastaddr, eh.ether_dhost,
+   sizeof(etherbroadcastaddr)) == 0)
+   m-m_flags |= M_BCAST;
+   else
+   m-m_flags |= M_MCAST;
dst_if = NULL;
+   }
 
/*
 * Multicast packets get handled a little differently:
@@ -1302,37 +1306,31 @@ bridgeintr_frame(struct bridge_softc *sc
  * not for us, and schedule an interrupt.
  */
 struct mbuf *
-bridge_input(struct ifnet *ifp, struct ether_header *eh0, struct mbuf *m)
+bridge_input(struct mbuf *m)
 {
+   struct ifnet *ifp;
struct bridge_softc *sc;
struct bridge_iflist *ifl;
+   struct bridge_iflist *srcifl;
struct ether_header *eh;
-#if NVLAN  0
-   uint16_t etype = ntohs(eh0-ether_type);
-#endif /* NVLAN  0 */
+   struct arpcom *ac;
+   struct mbuf_list ml = MBUF_LIST_INITIALIZER();
+   struct mbuf *mc;
+   int s;
 
-   /*
-* Make sure this interface is a bridge member.
-*/
-   if (ifp == NULL || ifp-if_bridgeport == NULL || m == NULL)
+   ifp = if_get(m-m_pkthdr.ph_ifidx);
+   KASSERT(ifp != NULL);
+   if (((ifp-if_flags  IFF_UP) == 0) || (ifp-if_bridgeport == NULL))
return (m);
 
if ((m-m_flags  M_PKTHDR) == 0)
panic(bridge_input(): no HDR);
 
-   m-m_flags = ~M_PROTO1;/* Loop prevention */
-
ifl = (struct bridge_iflist *)ifp-if_bridgeport;
sc = ifl-bridge_sc;
if ((sc-sc_if.if_flags  IFF_RUNNING) == 0)
return (m);
 
-   M_PREPEND(m, sizeof(*eh), M_DONTWAIT);
-   if (m == NULL)
-   return (NULL);
-   eh = mtod(m, struct ether_header *);
-   memmove(eh, eh0, sizeof(*eh));
-
 #if NBPFILTER  0
if (sc-sc_if.if_bpf)
bpf_mtap_ether(sc-sc_if.if_bpf, m, BPF_DIRECTION_IN);
@@ -1340,35 +1338,8 @@ bridge_input(struct ifnet *ifp, struct e
 
bridge_span(sc, m);
 
-   m = bridge_dispatch(ifl, ifp, m);
-
-#if NVLAN  0
-   if ((m != NULL)  ((m-m_flags  M_VLANTAG) ||
-   etype == ETHERTYPE_VLAN || etype == ETHERTYPE_QINQ)) {
-   /* The bridge did not want the vlan frame either, drop it. */
-   ifp-if_noproto

bridge(4) local broadcast fix

2015-06-17 Thread Martin Pieuchot
It makes sense to not retransmit a packet on the interface it was
received from but we should still call bridge_localbroadcast() on
this interface otherwise the network stack never see this packet.

This fix the configuration where you have a vlan(4) on top of an
interface in a bridge(4) and the vlan(4) is not in the bridge(4).

ok?

Index: net/if_bridge.c
===
RCS file: /cvs/src/sys/net/if_bridge.c,v
retrieving revision 1.244
diff -u -p -r1.244 if_bridge.c
--- net/if_bridge.c 16 Jun 2015 11:09:39 -  1.244
+++ net/if_bridge.c 17 Jun 2015 12:05:05 -
@@ -1500,12 +1500,9 @@ bridge_broadcast(struct bridge_softc *sc
int len, used = 0;
 
TAILQ_FOREACH(p, sc-sc_iflist, next) {
-   /*
-* Don't retransmit out of the same interface where
-* the packet was received from.
-*/
dst_if = p-ifp;
-   if (dst_if-if_index == ifp-if_index)
+
+   if ((dst_if-if_flags  IFF_RUNNING) == 0)
continue;
 
if ((p-bif_flags  IFBIF_STP) 
@@ -1516,15 +1513,6 @@ bridge_broadcast(struct bridge_softc *sc
(m-m_flags  (M_BCAST | M_MCAST)) == 0)
continue;
 
-   if ((dst_if-if_flags  IFF_RUNNING) == 0)
-   continue;
-
-   if (IF_QFULL(dst_if-if_snd)) {
-   IF_DROP(dst_if-if_snd);
-   sc-sc_if.if_oerrors++;
-   continue;
-   }
-
/* Drop non-IP frames if the appropriate flag is set. */
if (p-bif_flags  IFBIF_BLOCKNONIP 
bridge_blocknonip(eh, m))
@@ -1534,6 +1522,19 @@ bridge_broadcast(struct bridge_softc *sc
continue;
 
bridge_localbroadcast(sc, dst_if, eh, m);
+
+   /*
+* Don't retransmit out of the same interface where
+* the packet was received from.
+*/
+   if (dst_if-if_index == ifp-if_index)
+   continue;
+
+   if (IF_QFULL(dst_if-if_snd)) {
+   IF_DROP(dst_if-if_snd);
+   sc-sc_if.if_oerrors++;
+   continue;
+   }
 
/* If last one, reuse the passed-in mbuf */
if (TAILQ_NEXT(p, next) == NULL) {



Re: bridge(4) and if_input()

2015-06-01 Thread Martin Pieuchot
On 28/05/15(Thu) 11:23, Martin Pieuchot wrote:
 Let's use if_input() rather than ether_input_mbuf().  This simplifies
 the bpf(4) bits and removes some duplicate rcvif assignments.
 
 Once ether_input_mbuf() is no more my plan is to start merging various
 ifp counters and put an interface index in the packet header.
 
 ok?

Anybody?

 
 Index: net/if_bridge.c
 ===
 RCS file: /cvs/src/sys/net/if_bridge.c,v
 retrieving revision 1.239
 diff -u -p -r1.239 if_bridge.c
 --- net/if_bridge.c   18 May 2015 11:43:57 -  1.239
 +++ net/if_bridge.c   22 May 2015 13:39:16 -
 @@ -1364,6 +1364,7 @@ bridge_dispatch(struct bridge_iflist *if
   struct bridge_iflist *srcifl;
   struct ether_header *eh;
   struct arpcom *ac;
 + struct mbuf_list ml = MBUF_LIST_INITIALIZER();
   struct mbuf *mc;
   int s;
  
 @@ -1411,21 +1412,14 @@ bridge_dispatch(struct bridge_iflist *if
   schednetisr(NETISR_BRIDGE);
   if (ifp-if_type == IFT_GIF) {
   TAILQ_FOREACH(ifl, sc-sc_iflist, next) {
 - if (ifl-ifp-if_type == IFT_ETHER)
 - break;
 - }
 - if (ifl != NULL) {
 - m-m_pkthdr.rcvif = ifl-ifp;
 - m-m_pkthdr.ph_rtableid = ifl-ifp-if_rdomain;
 -#if NBPFILTER  0
 - if (ifl-ifp-if_bpf)
 - bpf_mtap_ether(ifl-ifp-if_bpf, m,
 - BPF_DIRECTION_IN);
 -#endif
 + if (ifl-ifp-if_type != IFT_ETHER)
 + continue;
 +
   m-m_flags |= M_PROTO1;
 - ether_input_mbuf(ifl-ifp, m);
 + ml_enqueue(ml, m);
 + if_input(ifl-ifp, ml);
   ifl-ifp-if_ipackets++;
 - m = NULL;
 + return (NULL);
   }
   }
   return (m);
 @@ -1462,24 +1456,14 @@ bridge_dispatch(struct bridge_iflist *if
   return (NULL);
   }
  
 - /* Make sure the real incoming interface
 -  * is aware */
 -#if NBPFILTER  0
 - if (ifl-ifp-if_bpf)
 - bpf_mtap_ether(ifl-ifp-if_bpf, m,
 - BPF_DIRECTION_IN);
 -#endif
 - /* Count for the interface we are going to */
 - ifl-ifp-if_ipackets++;
 -
   /* Count for the bridge */
   sc-sc_if.if_ipackets++;
   sc-sc_if.if_ibytes += m-m_pkthdr.len;
  
 - m-m_pkthdr.rcvif = ifl-ifp;
 - m-m_pkthdr.ph_rtableid = ifl-ifp-if_rdomain;
   m-m_flags |= M_PROTO1;
 - ether_input_mbuf(ifl-ifp, m);
 + ml_enqueue(ml, m);
 + if_input(ifl-ifp, ml);
 + ifl-ifp-if_ipackets++;
   return (NULL);
   }
   if (bcmp(ac-ac_enaddr, eh-ether_shost, ETHER_ADDR_LEN) == 0
 @@ -1615,8 +1599,10 @@ void
  bridge_localbroadcast(struct bridge_softc *sc, struct ifnet *ifp,
  struct ether_header *eh, struct mbuf *m)
  {
 + struct mbuf_list ml = MBUF_LIST_INITIALIZER();
   struct mbuf *m1;
   u_int16_t etype;
 + int s;
  
   /*
* quick optimisation, don't send packets up the stack if no
 @@ -1638,18 +1624,11 @@ bridge_localbroadcast(struct bridge_soft
   sc-sc_if.if_oerrors++;
   return;
   }
 - /* fixup header a bit */
 - m1-m_pkthdr.rcvif = ifp;
 - m1-m_pkthdr.ph_rtableid = ifp-if_rdomain;
   m1-m_flags |= M_PROTO1;
 -
 -#if NBPFILTER  0
 - if (ifp-if_bpf)
 - bpf_mtap(ifp-if_bpf, m1,
 - BPF_DIRECTION_IN);
 -#endif
 -
 - ether_input_mbuf(ifp, m1);
 + ml_enqueue(ml, m1);
 + s = splnet();
 + if_input(ifp, ml);
 + splx(s);
   ifp-if_ipackets++;
  }
  
 



Re: arpresolve: unresolved and rt_expire == 0

2015-06-01 Thread Martin Pieuchot
On 31/05/15(Sun) 22:05, mxb wrote:
 
 Hello,
 any ideas regarding subject ?

Yes various.  Any idea about how to reproduce it?

What you're seeing is a side-effect of r1.151 of netinet/if_ether.c.

 I see this in ‘dmesg’. 
 
 Also all local (on machine itself) arp entries are incomplete:

Yes because we stopped copying the Ethernet address into the gateway 
field of RTF_LOCAL routes.  But since RTF_LOCAL routes are bound to
lo0 I don't know how your kernel ends up calling arpresolve() and
trigger this code path.



Re: carp(4) is out

2015-05-28 Thread Martin Pieuchot
On 23/05/15(Sat) 19:08, mxb wrote:
 [...]
 so far no problems.

Here's an updated version to match recent changes.  It also includes a
nitpick fix from bluhm@.

Tests and oks welcome.

Index: net/if_ethersubr.c
===
RCS file: /cvs/src/sys/net/if_ethersubr.c,v
retrieving revision 1.201
diff -u -p -r1.201 if_ethersubr.c
--- net/if_ethersubr.c  26 May 2015 11:39:07 -  1.201
+++ net/if_ethersubr.c  28 May 2015 08:05:02 -
@@ -490,18 +490,6 @@ ether_input(struct mbuf *m)
}
 #endif
 
-#if NCARP  0
-   if (ifp-if_carp) {
-   if (ifp-if_type != IFT_CARP  (carp_input(ifp, eh, m) == 0))
-   return (1);
-   /* clear mcast if received on a carp IP balanced address */
-   else if (ifp-if_type == IFT_CARP 
-   m-m_flags  (M_BCAST|M_MCAST) 
-   carp_our_mcastaddr(ifp, (u_int8_t *)eh-ether_dhost))
-   m-m_flags = ~(M_BCAST|M_MCAST);
-   }
-#endif /* NCARP  0 */
-
ac = (struct arpcom *)ifp;
 
/*
Index: netinet/ip_carp.c
===
RCS file: /cvs/src/sys/netinet/ip_carp.c,v
retrieving revision 1.257
diff -u -p -r1.257 ip_carp.c
--- netinet/ip_carp.c   21 May 2015 09:17:53 -  1.257
+++ netinet/ip_carp.c   28 May 2015 09:06:00 -
@@ -120,6 +120,7 @@ struct carp_softc {
 #definesc_carpdev  sc_ac.ac_if.if_carpdev
void *ah_cookie;
void *lh_cookie;
+   struct ifih *sc_ifih;
struct ip_moptions sc_imo;
 #ifdef INET6
struct ip6_moptions sc_im6o;
@@ -193,6 +194,7 @@ voidcarp_hmac_generate(struct carp_vhos
unsigned char *, u_int8_t);
 intcarp_hmac_verify(struct carp_vhost_entry *, u_int32_t *,
unsigned char *);
+intcarp_input(struct mbuf *);
 void   carp_proto_input_c(struct mbuf *, struct carp_header *, int,
sa_family_t);
 void   carpattach(int);
@@ -824,6 +826,7 @@ carp_del_all_timeouts(struct carp_softc 
 void
 carpdetach(struct carp_softc *sc)
 {
+   struct ifnet *ifp;
struct carp_if *cif;
int s;
 
@@ -839,20 +842,29 @@ carpdetach(struct carp_softc *sc)
carp_setrun_all(sc, 0);
carp_multicast_cleanup(sc);
 
-   s = splnet();
if (sc-ah_cookie != NULL)
hook_disestablish(sc-sc_if.if_addrhooks, sc-ah_cookie);
-   if (sc-sc_carpdev != NULL) {
-   if (sc-lh_cookie != NULL)
-   hook_disestablish(sc-sc_carpdev-if_linkstatehooks,
-   sc-lh_cookie);
-   cif = (struct carp_if *)sc-sc_carpdev-if_carp;
-   TAILQ_REMOVE(cif-vhif_vrs, sc, sc_list);
-   if (!--cif-vhif_nvrs) {
-   ifpromisc(sc-sc_carpdev, 0);
-   sc-sc_carpdev-if_carp = NULL;
-   free(cif, M_IFADDR, sizeof(*cif));
-   }
+
+   ifp = sc-sc_carpdev;
+   if (ifp == NULL)
+   return;
+
+   s = splnet();
+   /* Restore previous input handler. */
+   if (--sc-sc_ifih-ifih_refcnt == 0) {
+   SLIST_REMOVE(ifp-if_inputs, sc-sc_ifih, ifih, ifih_next);
+   free(sc-sc_ifih, M_DEVBUF, sizeof(*sc-sc_ifih));
+   }
+
+   if (sc-lh_cookie != NULL)
+   hook_disestablish(ifp-if_linkstatehooks,
+   sc-lh_cookie);
+   cif = (struct carp_if *)ifp-if_carp;
+   TAILQ_REMOVE(cif-vhif_vrs, sc, sc_list);
+   if (!--cif-vhif_nvrs) {
+   ifpromisc(ifp, 0);
+   ifp-if_carp = NULL;
+   free(cif, M_IFADDR, sizeof(*cif));
}
sc-sc_carpdev = NULL;
splx(s);
@@ -1403,27 +1415,21 @@ carp_get_srclladdr(struct ifnet *ifp, u_
 }
 
 int
-carp_our_mcastaddr(struct ifnet *ifp, u_int8_t *d_enaddr)
-{
-   struct carp_softc *sc = ifp-if_softc;
-
-   if (sc-sc_balancing != CARP_BAL_IP)
-   return (0);
-
-   return (!memcmp(sc-sc_ac.ac_enaddr, d_enaddr, ETHER_ADDR_LEN));
-}
-
-
-int
-carp_input(struct ifnet *ifp0, struct ether_header *eh0, struct mbuf *m)
+carp_input(struct mbuf *m)
 {
+   struct carp_softc *sc;
struct ether_header *eh;
-   struct carp_if *cif = (struct carp_if *)ifp0-if_carp;
-   struct ifnet *ifp;
+   struct mbuf_list ml = MBUF_LIST_INITIALIZER();
+   struct carp_if *cif;
+   struct ifnet *ifp0, *ifp;
 
-   ifp = carp_ourether(cif, eh0-ether_dhost);
-   if (ifp == NULL  (m-m_flags  (M_BCAST|M_MCAST)) == 0)
-   return (1);
+   ifp0 = m-m_pkthdr.rcvif;
+   eh = mtod(m, struct ether_header *);
+   cif = (struct carp_if *)ifp0-if_carp;
+
+   ifp = carp_ourether(cif, eh-ether_dhost);
+   if (ifp == NULL  !ETHER_IS_MULTICAST(eh-ether_dhost))
+   return (0);
 
if (ifp == NULL) {
struct carp_softc *vh;
@@ -1439,41 +1445,33 @@ 

tun(4) and if_input()

2015-05-28 Thread Martin Pieuchot
Replace the last ether_input_mbuf() by if_input().

Index: net/if_tun.c
===
RCS file: /cvs/src/sys/net/if_tun.c,v
retrieving revision 1.144
diff -u -p -r1.144 if_tun.c
--- net/if_tun.c26 May 2015 11:36:26 -  1.144
+++ net/if_tun.c28 May 2015 09:25:52 -
@@ -871,32 +871,34 @@ tunwrite(dev_t dev, struct uio *uio, int
}
 
top-m_pkthdr.len = tlen;
-   top-m_pkthdr.rcvif = ifp;
-
-#if NBPFILTER  0
-   if (ifp-if_bpf) {
-   s = splnet();
-   bpf_mtap(ifp-if_bpf, top, BPF_DIRECTION_IN);
-   splx(s);
-   }
-#endif
 
if (tp-tun_flags  TUN_LAYER2) {
+   struct mbuf_list ml = MBUF_LIST_INITIALIZER();
+
+   ml_enqueue(ml, top);
s = splnet();
-   ether_input_mbuf(ifp, top);
+   if_input(ifp, ml);
splx(s);
-
ifp-if_ipackets++;
 
return (0);
}
 
+#if NBPFILTER  0
+   if (ifp-if_bpf) {
+   s = splnet();
+   bpf_mtap(ifp-if_bpf, top, BPF_DIRECTION_IN);
+   splx(s);
+   }
+#endif
+
th = mtod(top, u_int32_t *);
/* strip the tunnel header */
top-m_data += sizeof(*th);
top-m_len  -= sizeof(*th);
top-m_pkthdr.len -= sizeof(*th);
top-m_pkthdr.ph_rtableid = ifp-if_rdomain;
+   top-m_pkthdr.rcvif = ifp;
 
switch (ntohl(*th)) {
case AF_INET:



<    1   2   3   4   5   6   7   8   9   10   >