Re: pledge bpf + 32bit arch unbreak

2016-07-05 Thread Martin Pelikan
> In many bpf-using programs, bpf is setup before privs are droppped,
> then locked, and then no significant ioctl's are done after that.
> 
> So please show the userland diffs that use this.

You're right.  I was thinking of arp(8) but that code path is write only.
I wrote it for the GSoC dhcpd which keeps a routing socket for interfaces
arriving/departing (plugging USB NICs or adding vlan(4)s into your router
really shouldn't make the dhcpd process die; even deleting interfaces will
keep the rest of the system serving happily).

It probably doesn't have to be there; the privileged part of the code fits
on a screen anyway and only does the bare minimum.

The uint64_t part still stands.


Index: kern/kern_pledge.c
===
RCS file: /cvs/src/sys/kern/kern_pledge.c,v
retrieving revision 1.174
diff -u -p -r1.174 kern_pledge.c
--- kern/kern_pledge.c  3 Jul 2016 04:36:08 -   1.174
+++ kern/kern_pledge.c  5 Jul 2016 17:35:04 -
@@ -79,7 +79,7 @@
 #include "drm.h"
 #endif
 
-int pledgereq_flags(const char *req);
+uint64_t pledgereq_flags(const char *req);
 int canonpath(const char *input, char *buf, size_t bufsize);
 int substrcmp(const char *p1, size_t s1, const char *p2, size_t s2);
 int resolvpath(struct proc *p, char **rdir, size_t *rdirlen, char **cwd,
@@ -404,7 +405,7 @@ sys_pledge(struct proc *p, void *v, regi
if (SCARG(uap, request)) {
size_t rbuflen;
char *rbuf, *rp, *pn;
-   int f;
+   uint64_t f;
 
rbuf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
error = copyinstr(SCARG(uap, request), rbuf, MAXPATHLEN,
@@ -1514,7 +1534,7 @@ pledge_swapctl(struct proc *p)
 }
 
 /* bsearch over pledgereq. return flags value if found, 0 else */
-int
+uint64_t
 pledgereq_flags(const char *req_name)
 {
int base = 0, cmp, i, lim;



pledge bpf + 32bit arch unbreak

2016-07-05 Thread Martin Pelikan
Only the bits necessary to set up a filter and lock down an incoming interface.


Index: kern/kern_pledge.c
===
RCS file: /cvs/src/sys/kern/kern_pledge.c,v
retrieving revision 1.174
diff -u -p -r1.174 kern_pledge.c
--- kern/kern_pledge.c  3 Jul 2016 04:36:08 -   1.174
+++ kern/kern_pledge.c  5 Jul 2016 17:35:04 -
@@ -79,7 +79,7 @@
 #include "drm.h"
 #endif
 
-int pledgereq_flags(const char *req);
+uint64_t pledgereq_flags(const char *req);
 int canonpath(const char *input, char *buf, size_t bufsize);
 int substrcmp(const char *p1, size_t s1, const char *p2, size_t s2);
 int resolvpath(struct proc *p, char **rdir, size_t *rdirlen, char **cwd,
@@ -359,6 +359,7 @@ static const struct {
uint64_t flags;
 } pledgereq[] = {
{ "audio",  PLEDGE_AUDIO },
+   { "bpf",PLEDGE_BPF },
{ "chown",  PLEDGE_CHOWN | PLEDGE_CHOWNUID },
{ "cpath",  PLEDGE_CPATH },
{ "disklabel",  PLEDGE_DISKLABEL },
@@ -404,7 +405,7 @@ sys_pledge(struct proc *p, void *v, regi
if (SCARG(uap, request)) {
size_t rbuflen;
char *rbuf, *rp, *pn;
-   int f;
+   uint64_t f;
 
rbuf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
error = copyinstr(SCARG(uap, request), rbuf, MAXPATHLEN,
@@ -1198,6 +1199,25 @@ pledge_ioctl(struct proc *p, long com, s
 #endif /* NAUDIO > 0 */
}
 
+   if ((p->p_p->ps_pledge & PLEDGE_BPF)) {
+   switch (com) {
+   case BIOCGBLEN:
+   case BIOCVERSION:
+   case BIOCIMMEDIATE:
+   case BIOCSFILDROP:
+   case BIOCSHDRCMPLT:
+   case BIOCSETF:
+   case BIOCSETIF:
+   case BIOCSETWF:
+   case BIOCLOCK:
+   if ((fp->f_type == DTYPE_VNODE) &&
+   (vp->v_type == VCHR) &&
+   (cdevsw[major(vp->v_rdev)].d_open == bpfopen))
+   return (0);
+   break;
+   }
+   }
+
if ((p->p_p->ps_pledge & PLEDGE_DISKLABEL)) {
switch (com) {
case DIOCGDINFO:
@@ -1514,7 +1534,7 @@ pledge_swapctl(struct proc *p)
 }
 
 /* bsearch over pledgereq. return flags value if found, 0 else */
-int
+uint64_t
 pledgereq_flags(const char *req_name)
 {
int base = 0, cmp, i, lim;
Index: sys/pledge.h
===
RCS file: /cvs/src/sys/sys/pledge.h,v
retrieving revision 1.29
diff -u -p -r1.29 pledge.h
--- sys/pledge.h3 Jul 2016 04:36:08 -   1.29
+++ sys/pledge.h5 Jul 2016 17:35:04 -
@@ -58,6 +58,7 @@
 #define PLEDGE_VMM 0x4000ULL   /* vmm ioctls */
 #define PLEDGE_CHOWN   0x8000ULL   /* chown(2) family */
 #define PLEDGE_CHOWNUID0x0001ULL   /* allow owner/group 
changes */
+#define PLEDGE_BPF 0x0002ULL   /* bpf ioctls */
 
 /*
  * Bits outside PLEDGE_USERSET are used by the kernel itself
@@ -103,6 +104,7 @@ static struct {
{ PLEDGE_DRM,   "drm" },
{ PLEDGE_VMM,   "vmm" },
{ PLEDGE_CHOWNUID,  "chown" },
+   { PLEDGE_BPF,   "bpf" },
{ 0, NULL },
 };
 #endif



Re: softraid(4): Linux Unified Key Setup

2015-05-25 Thread Martin Pelikan
 But why? Sharing encrypted disks between systems is probably a mistake.

My scenarios are multiboot desktop/laptop and external flash/hard drives.
Do you store your backup unencrypted?  What if you're away and someone else
[here: a Linux user] has been given a task of performing restores?

This format may not be the best, but at least is supported elsewhere.



softraid(4): Linux Unified Key Setup

2015-05-24 Thread Martin Pelikan
Index: conf/files
===
RCS file: /cvs/src/sys/conf/files,v
retrieving revision 1.592
diff -u -p -r1.592 files
--- conf/files  11 May 2015 06:46:21 -  1.592
+++ conf/files  24 May 2015 12:29:00 -
@@ -517,6 +517,8 @@ attach  softraid at root
 file   dev/softraid.c  softraidneeds-flag
 file   dev/softraid_concat.c   softraid
 file   dev/softraid_crypto.c   softraid  crypto
+file   dev/softraid_luks.c softraid  crypto
+file   dev/luks.c  softraid  crypto
 file   dev/softraid_raid0.csoftraid
 file   dev/softraid_raid1.csoftraid
 file   dev/softraid_raid5.csoftraid
@@ -686,7 +688,7 @@ file kern/kern_synch.c
 file kern/kern_tc.c
 file kern/kern_time.c
 file kern/kern_timeout.c
-file kern/kern_uuid.c  gpt
+file kern/kern_uuid.c  gpt | (softraid  crypto)
 file kern/kern_watchdog.c  !small_kernel
 file kern/kern_task.c
 file kern/kern_xxx.c
Index: crypto/xform.c
===
RCS file: /cvs/src/sys/crypto/xform.c,v
retrieving revision 1.46
diff -u -p -r1.46 xform.c
--- crypto/xform.c  14 Mar 2015 03:38:46 -  1.46
+++ crypto/xform.c  24 May 2015 12:02:30 -
@@ -18,6 +18,8 @@
  *
  * AES XTS implementation in 2008 by Damien Miller
  *
+ * PBKDF2: Copyright (c) 2008 Damien Bergamini damien.bergam...@free.fr
+ *
  * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis,
  * Angelos D. Keromytis and Niels Provos.
  *
@@ -46,6 +48,8 @@
 #include sys/errno.h
 #include sys/time.h
 #include sys/kernel.h
+#include sys/malloc.h
+#include sys/stdint.h
 #include machine/cpu.h
 
 #include crypto/md5.h
@@ -58,6 +62,7 @@
 #include crypto/cryptodev.h
 #include crypto/xform.h
 #include crypto/gmac.h
+#include crypto/hmac.h
 
 extern void des_ecb3_encrypt(caddr_t, caddr_t, caddr_t, caddr_t, caddr_t, int);
 extern void des_ecb_encrypt(caddr_t, caddr_t, caddr_t, int);
@@ -675,4 +680,64 @@ lzs_dummy(u_int8_t *data, u_int32_t size
 {
*out = NULL;
return (0);
+}
+
+/*
+ * Password-Based Key Derivation Function 2 (PKCS #5 v2.0).
+ * Code based on IEEE Std 802.11-2007, Annex H.4.2.
+ */
+int
+pkcs5_pbkdf2(const char *pass, size_t pass_len, const char *salt,
+size_t salt_len, u_int8_t *key, size_t key_len, u_int rounds)
+{
+   HMAC_SHA1_CTX ctx;
+   u_int8_t *asalt, obuf[SHA1_DIGEST_LENGTH];
+   u_int8_t d1[SHA1_DIGEST_LENGTH], d2[SHA1_DIGEST_LENGTH];
+   u_int i, j;
+   u_int count;
+   size_t r;
+
+   if (rounds  1 || key_len == 0)
+   return -1;
+   if (salt_len == 0 || salt_len  SIZE_MAX - 1)
+   return -1;
+   asalt = malloc(salt_len + 4, M_TEMP, M_NOWAIT);
+   if (asalt == NULL)
+   return -1;
+
+   memcpy(asalt, salt, salt_len);
+
+   for (count = 1; key_len  0; count++) {
+   asalt[salt_len + 0] = (count  24)  0xff;
+   asalt[salt_len + 1] = (count  16)  0xff;
+   asalt[salt_len + 2] = (count  8)  0xff;
+   asalt[salt_len + 3] = count  0xff;
+   HMAC_SHA1_Init(ctx, pass, pass_len);
+   HMAC_SHA1_Update(ctx, asalt, salt_len + 4);
+   HMAC_SHA1_Final(d1, ctx);
+   memcpy(obuf, d1, sizeof(obuf));
+
+   for (i = 1; i  rounds; i++) {
+   HMAC_SHA1_Init(ctx, pass, pass_len);
+   HMAC_SHA1_Update(ctx, d1, sizeof d1);
+   HMAC_SHA1_Final(d2, ctx);
+
+   memcpy(d1, d2, sizeof(d1));
+   for (j = 0; j  sizeof(obuf); j++)
+   obuf[j] ^= d1[j];
+   }
+
+   r = MIN(key_len, SHA1_DIGEST_LENGTH);
+   memcpy(key, obuf, r);
+   key += r;
+   key_len -= r;
+   };
+   explicit_bzero(asalt, salt_len + 4);
+   explicit_bzero(d1, sizeof(d1));
+   explicit_bzero(d2, sizeof(d2));
+   explicit_bzero(obuf, sizeof(obuf));
+   explicit_bzero(ctx, sizeof ctx);
+   free(asalt, M_TEMP, salt_len + 4);
+
+   return 0;
 }
Index: dev/luks.c
===
RCS file: dev/luks.c
diff -N dev/luks.c
--- /dev/null   1 Jan 1970 00:00:00 -
+++ dev/luks.c  24 May 2015 12:02:30 -
@@ -0,0 +1,252 @@
+/*
+ * Copyright (c) 2015 Martin Pelikan peli...@openbsd.org
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR

bioctl(8): Linux Unified Key Setup

2015-05-24 Thread Martin Pelikan
Index: bioctl.c
===
RCS file: /cvs/src/sbin/bioctl/bioctl.c,v
retrieving revision 1.126
diff -u -p -r1.126 bioctl.c
--- bioctl.c11 May 2015 12:14:22 -  1.126
+++ bioctl.c24 May 2015 12:39:27 -
@@ -31,6 +31,7 @@
 #include sys/ioctl.h
 #include sys/dkio.h
 #include sys/stat.h
+#include dev/softraid_luks.h
 #include dev/softraidvar.h
 #include dev/biovar.h
 
@@ -60,6 +61,8 @@ int   bio_parse_devlist(char *, dev_t *)
 void   bio_kdf_derive(struct sr_crypto_kdfinfo *,
struct sr_crypto_kdf_pbkdf2 *, char *, int);
 void   bio_kdf_generate(struct sr_crypto_kdfinfo *);
+void   luks_kdf_derive(u_int8_t **, size_t *,
+   struct luks_user_pbkdf2 *);
 void   derive_key_pkcs(int, u_int8_t *, size_t, u_int8_t *,
size_t, char *, int);
 
@@ -765,6 +768,8 @@ bio_createraid(u_int16_t level, char *de
struct sr_crypto_kdfinfo kdfinfo;
struct sr_crypto_kdf_pbkdf2 kdfhint;
struct stat sb;
+   u_int8_t*response = NULL;
+   size_t  resplen;
int rv, no_dev, fd;
dev_t   *dt;
u_int16_t   min_disks = 0;
@@ -788,6 +793,7 @@ bio_createraid(u_int16_t level, char *de
case 5:
min_disks = 3;
break;
+   case 'L':
case 'C':
min_disks = 1;
break;
@@ -813,7 +819,36 @@ bio_createraid(u_int16_t level, char *de
create.bc_flags = BIOC_SCDEVT | cflags;
create.bc_key_disk = NODEV;
 
-   if (level == 'C'  key_disk == NULL) {
+   if (level == 'L') {
+
+   struct luks_user_pbkdf2  hint;
+
+   memset(hint, 0, sizeof hint);
+
+   create.bc_flags |= BIOC_SCNOAUTOASSEMBLE;
+
+   create.bc_opaque = hint;
+   create.bc_opaque_size = sizeof hint;
+   create.bc_opaque_flags = BIOC_SOOUT;
+
+   if (ioctl(devh, BIOCCREATERAID, create))
+   err(1, ioctl);
+
+   bio_status(create.bc_bio.bio_status);
+
+   if (create.bc_opaque_status == BIOC_SOINOUT_OK) {
+   luks_kdf_derive(response, resplen, hint);
+   memset(hint, 0, sizeof hint);
+   } else {
+   errx(1, creating LUKS not supported yet);
+   }
+   
+   create.bc_opaque = response;
+   create.bc_opaque_size = resplen;
+   create.bc_opaque_flags = BIOC_SOIN;
+
+   }
+   else if (level == 'C'  key_disk == NULL) {
 
memset(kdfinfo, 0, sizeof(kdfinfo));
memset(kdfhint, 0, sizeof(kdfhint));
@@ -870,6 +905,10 @@ bio_createraid(u_int16_t level, char *de
 
rv = ioctl(devh, BIOCCREATERAID, create);
explicit_bzero(kdfinfo, sizeof(kdfinfo));
+   if (response != NULL) {
+   explicit_bzero(response, resplen);
+   free(response);
+   }
if (rv == -1)
err(1, BIOCCREATERAID);
 
@@ -921,6 +960,47 @@ bio_kdf_generate(struct sr_crypto_kdfinf
kdfinfo-maskkey, sizeof(kdfinfo-maskkey),
kdfinfo-pbkdf2.salt, sizeof(kdfinfo-pbkdf2.salt),
New passphrase: , 1);
+}
+
+void
+luks_kdf_derive(u_int8_t **respp, size_t *resplenp, struct luks_user_pbkdf2 *h)
+{
+   char passphrase[1024];
+   u_int8_t *ret;
+   int i; 
+
+   if (readpassphrase(LUKS Passphrase: , passphrase, sizeof(passphrase),
+   rpp_flag) == NULL)
+   errx(1, unable to read passphrase);
+
+   /* This will be leaked, but this is a one-shot program anyway. */
+   ret = reallocarray(NULL, h-key_bytes, LUKS_NUM_KEYS);
+   if (ret == NULL) {
+   explicit_bzero(passphrase, sizeof(passphrase));
+   errx(1, out of memory, need %u\n, h-key_bytes);
+   }
+
+   for (i = 0; i  LUKS_NUM_KEYS; ++i) {
+   u_int8_t *digest = ret + (i * h-key_bytes);
+
+   if ((h-have_keys  (1  i)) == 0)
+   continue;
+
+   /* derive key from passphrase */
+   if (pkcs5_pbkdf2(passphrase, strlen(passphrase),
+   h-keys[i].salt, LUKS_SALT_LEN,
+   digest, h-key_bytes, h-keys[i].iterations) != 0)
+   goto fail;
+   }
+   explicit_bzero(passphrase, sizeof(passphrase));
+
+   *respp = ret;
+   *resplenp = h-key_bytes * LUKS_NUM_KEYS;
+   return;
+ fail:
+   explicit_bzero(passphrase, sizeof(passphrase));
+   explicit_bzero(ret, h-key_bytes * LUKS_NUM_KEYS);
+   errx(1, pbkdf2 failed);
 }
 
 int



ypldap - support for ypcat(1)ing the whole database

2015-02-11 Thread Martin Pelikan
Index: yp.c
===
RCS file: /cvs/src/usr.sbin/ypldap/yp.c,v
retrieving revision 1.14
diff -u -p -r1.14 yp.c
--- yp.c11 Feb 2015 01:26:00 -  1.14
+++ yp.c12 Feb 2015 02:21:09 -
@@ -51,6 +51,7 @@ int   yp_check(struct svc_req *);
 intyp_valid_domain(char *, struct ypresp_val *);
 void   yp_make_val(struct ypresp_val *, char *, int);
 void   yp_make_keyval(struct ypresp_key_val *, char *, char *);
+bool_t ypdb_xdr_get_all(XDR *xdrs, ypreq_nokey *req);
 
 static struct env  *env;
 
@@ -218,6 +219,8 @@ yp_dispatch(struct svc_req *req, SVCXPRT
return;
case YPPROC_ALL:
log_debug(ypproc_all);
+   xdr_argument = (xdrproc_t) xdr_ypreq_nokey;
+   xdr_result = (xdrproc_t) ypdb_xdr_get_all;
if (yp_check(req) == -1)
return;
cb = (void *)ypproc_all_2_svc;
@@ -548,13 +551,175 @@ ypproc_next_2_svc(ypreq_key *arg, struct
 ypresp_all *
 ypproc_all_2_svc(ypreq_nokey *arg, struct svc_req *req)
 {
-   static struct ypresp_allres;
+   struct ypresp_val fake;
 
-   if (yp_valid_domain(arg-domain, (struct ypresp_val *)res) == -1)
-   return (res);
+   if (yp_valid_domain(arg-domain, fake) == -1)
+   return (NULL);
+   if (strcmp(arg-map, passwd.byname) 
+   strcmp(arg-map, passwd.byuid) 
+   strcmp(arg-map, group.byname) 
+   strcmp(arg-map, group.bygid) 
+   strcmp(arg-map, netid.byname)) {
+   return (NULL);
+   }
+   return ((void *) arg);
+}
 
-   svcerr_auth(req-rq_xprt, AUTH_FAILED);
-   return (NULL);
+static bool_t
+ypdb_xdr_get_all_passwd(XDR *xdrs, struct ypresp_all *resp, int byname)
+{
+   static char key[YPMAXRECORD + 1];
+   struct userent *ue;
+
+   RB_FOREACH(ue, user_name_tree, env-sc_user_names) {
+   size_t len, keylen = strlen(ue-ue_line);
+
+   resp-more = TRUE;
+   resp-ypresp_all_u.val.stat = TRUE;
+
+   if (byname) {
+   strlcpy(key, ue-ue_line, sizeof key);
+   resp-ypresp_all_u.val.key.keydat_val = key;
+   resp-ypresp_all_u.val.key.keydat_len = keylen;
+   }
+   else {
+   int kl = snprintf(key, sizeof key, %u, ue-ue_uid);
+
+   if (kl  0) {
+   log_warnx(%s: key too long, __func__);
+   return FALSE;
+   }
+   resp-ypresp_all_u.val.key.keydat_val = key;
+   resp-ypresp_all_u.val.key.keydat_len = kl;
+   }
+
+   ue-ue_line[keylen] = ':';
+   len = strnlen(ue-ue_line, YPMAXRECORD);
+   resp-ypresp_all_u.val.val.valdat_val = ue-ue_line;
+   resp-ypresp_all_u.val.val.valdat_len = len;
+
+   if (!xdr_ypresp_all(xdrs, resp)) {
+   log_warn(%s: %s (%zu bytes), __func__, key, len);
+   return FALSE;
+   }
+
+   ue-ue_line[keylen] = '\0';
+   }
+   return TRUE;
+}
+
+static bool_t
+ypdb_xdr_get_all_group(XDR *xdrs, struct ypresp_all *resp, int byname)
+{
+   static char key[YPMAXRECORD + 1];
+   struct groupent *ge;
+
+   RB_FOREACH(ge, group_name_tree, env-sc_group_names) {
+   size_t len, keylen = strlen(ge-ge_line);
+
+   resp-more = TRUE;
+   resp-ypresp_all_u.val.stat = TRUE;
+   if (byname) {
+   strlcpy(key, ge-ge_line, sizeof key);
+   resp-ypresp_all_u.val.key.keydat_val = key;
+   resp-ypresp_all_u.val.key.keydat_len = keylen;
+   }
+   else {
+   int kl = snprintf(key, sizeof key, %u, ge-ge_gid);
+
+   if (kl  0) {
+   log_warnx(%s: key too long, __func__);
+   return FALSE;
+   }
+   resp-ypresp_all_u.val.key.keydat_val = key;
+   resp-ypresp_all_u.val.key.keydat_len = kl;
+   }
+
+   ge-ge_line[keylen] = ':';
+   len = strnlen(ge-ge_line, YPMAXRECORD);
+   resp-ypresp_all_u.val.val.valdat_val = ge-ge_line;
+   resp-ypresp_all_u.val.val.valdat_len = len;
+
+   if (!xdr_ypresp_all(xdrs, resp)) {
+   log_warn(%s: %s (%zu bytes), __func__, key, len);
+   return FALSE;
+   }
+
+   ge-ge_line[keylen] = '\0';
+   }
+   return TRUE;
+}
+
+static bool_t
+ypdb_xdr_get_all_netid_byname(XDR *xdrs, struct ypresp_all *resp)
+{
+   static char key[YPMAXRECORD + 1];
+   struct userent *ue;
+
+   

Re: HFSC queue pusher

2013-10-28 Thread Martin Pelikan
 As far as I know, HZ is all we have in a portable fashion.  How do you
 get better?
 
 (I have an answer.  I've prodded for years.  Noone has bit yet.  Maybe
 you will).

Is the answer start writing drivers for other timers and make them run
softclock independently of the scheduler tick?  Do we want tickless
operation in the future?

I don't know enough about our supported platforms to see whether any of
them could actually benefit from it, other than amd64 and i386.
The local APIC timer on amd64 is likely the first candidate.  Scheduler
tick would then become yet another timeout.  I don't see if it is that
easy and what else could it possibly break.

This is why the diff doesn't bother with any of that.  It just
simplifies the current situation and shows places to improve later on.

ok? :-)



HFSC queue pusher

2013-10-27 Thread Martin Pelikan
Hi,

if you noticed weak newqueue performance, it was because one component
of it was missing.  After a discussion with claudio I made this diff,
which makes a timeout per HFSC-enabled interface and pushes the data
every hardclock tick, similarly to what oldtbr_timeout() used to do.

This way, in the future, we can:
 - Replace splnet() with mtx_enter(ifp-lock) directly (here).
 - Replace the current coarse 1/HZ timing with a proper high resolution
   one.  This should be done directly in hfsc_dequeue by setting the
   exact value (or maybe slightly more, to accumulate more dequeues into
   one if_start call) instead of just 1 tick all the time.

   The timing details will probably become clearer when we switch from
   TAILQs to RB trees and stop spending huge amounts of time going back
   and forth over all of the active classes.

This diff applies to our current tree, to make it work with unlimited v3
diff, you obviously need to substitute some -'s for .'s.

comments? ok?
--
Martin Pelikan


Index: hfsc.c
===
RCS file: /cvs/src/sys/net/hfsc.c,v
retrieving revision 1.1
diff -u -p -r1.1 hfsc.c
--- hfsc.c  12 Oct 2013 11:39:17 -  1.1
+++ hfsc.c  27 Oct 2013 22:53:50 -
@@ -74,6 +74,7 @@ inthfsc_addq(struct hfsc_class *, str
 struct mbuf*hfsc_getq(struct hfsc_class *);
 struct mbuf*hfsc_pollq(struct hfsc_class *);
 voidhfsc_purgeq(struct hfsc_class *);
+voidhfsc_deferred(void *);
 voidhfsc_update_cfmin(struct hfsc_class *);
 voidhfsc_set_active(struct hfsc_class *, int);
 voidhfsc_set_passive(struct hfsc_class *);
@@ -140,6 +141,9 @@ hfsc_attach(struct ifnet *ifp)
hif-hif_eligible = hfsc_ellist_alloc();
hif-hif_ifq = (struct ifqueue *)ifp-if_snd; /* XXX cast temp */
ifp-if_snd.ifq_hfsc = hif;
+   timeout_set(hif-hif_defer, hfsc_deferred, ifp);
+   /* XXX HRTIMER don't schedule it yet, only when some packets wait. */
+   timeout_add(hif-hif_defer, 1);
 
return (0);
 }
@@ -147,6 +151,7 @@ hfsc_attach(struct ifnet *ifp)
 int
 hfsc_detach(struct ifnet *ifp)
 {
+   timeout_del(ifp-if_snd.ifq_hfsc-hif_defer);
free(ifp-if_snd.ifq_hfsc, M_DEVBUF);
ifp-if_snd.ifq_hfsc = NULL;
 
@@ -557,6 +562,7 @@ hfsc_dequeue(struct ifqueue *ifq, int re
 
cl = tcl;
}
+   /* XXX HRTIMER plan hfsc_deferred precisely here. */
if (cl == NULL)
return (NULL);
}
@@ -601,6 +607,21 @@ hfsc_dequeue(struct ifqueue *ifq, int re
}
 
return (m);
+}
+
+void
+hfsc_deferred(void *arg)
+{
+   struct ifnet *ifp = arg;
+   int s;
+
+   s = splnet();
+   if (HFSC_ENABLED(ifp-if_snd)  !IFQ_IS_EMPTY(ifp-if_snd))
+   if_start(ifp);
+   splx(s);
+
+   /* XXX HRTIMER nearest virtual/fit time is likely less than 1/HZ. */
+   timeout_add(ifp-if_snd.ifq_hfsc-hif_defer, 1);
 }
 
 int
Index: hfsc.h
===
RCS file: /cvs/src/sys/net/hfsc.h,v
retrieving revision 1.1
diff -u -p -r1.1 hfsc.h
--- hfsc.h  12 Oct 2013 11:39:17 -  1.1
+++ hfsc.h  27 Oct 2013 22:53:50 -
@@ -33,6 +33,8 @@
 #ifndef _HFSC_H_
 #define_HFSC_H_
 
+#include sys/timeout.h
+
 /* hfsc class flags */
 #defineHFSC_RED0x0001  /* use RED */
 #defineHFSC_ECN0x0002  /* use RED/ECN */
@@ -242,6 +244,7 @@ struct hfsc_if {
u_int   hif_classid;/* class id sequence number */
 
hfsc_ellist_t *hif_eligible;/* eligible list */
+   struct timeout hif_defer;   /* for queues that weren't ready */
 };
 
 #define HFSC_CLK_SHIFT 8



Re: partial xlocale(3) port from FreeBSD

2013-10-21 Thread Martin Pelikan
 Indeed, doing collation properly (i.e. with Unicode, not just 8 bit
 characters like FreeBSD does) really is a non-trivial effort.
 
 It requires some expertise in linguistics and a solid understanding
 of the unicode standard. You'd need to make use of something like ICU
 (icu-project.org) to keep your sanity, or implement a whole lot of
 that code base yourself...

Unfortunately, that requires support in the 3rd party software itself.


 Applications don't care where a symbol comes from.
 Build scripts and Makefiles might expect them to be in libc and
 would need to link an additional library, but that's trivial to do.

For all such ports?  Ok then :-)

 I think you should tackle your goals (C++11, collation) in isolation.
 They aren't coupled, really.

Yes, but xlocale seemed like a thing they have in common.

 If we bother with collation I think we should try to do better.

Yes, but that I think is only applying a correct library.  As you've
said earlier, getting this right is really hard.

 I would suggest to implement a small and non-OS-specific stub for
 libcxx that they can use on any OS lacking xlocale support.
 Replace/enhance the existing shim for Solaris as part of this effort.
 Work with the libcxx team to integrate your changes there.
 
 If they tell you that they won't run on a non-xlocale OS that isn't
 Solaris, implement the shim anyway and add it to our ports tree.
 
 The shim is going to be a lot less work, and doesn't preclude an
 implentation inside libc at a later stage.

Thanks for showing the right direction.  I'll look into it as soon as
I have more time; at least I know what is needed and how big is it.
--
Martin Pelikan



unlimited HFSC v3: more readable, less hacks

2013-10-20 Thread Martin Pelikan
Hopefully the third time does the charm.

The previous union approach to altq/newq bits was wrong, because
switching back and forth was racy.  This new diff then concatenates
these structures like [ifqueue, hfsc_if, altq-bits], has some better
names, doesn't need renaming stuff in the old code (it can remain 2
years old), removes the now useless ifq_hfsc pointer.

As always, it's being heavily tested.
--
Martin Pelikan


? net/hfsc.c.instrumented
Index: altq/if_altq.h
===
RCS file: /cvs/src/sys/altq/if_altq.h,v
retrieving revision 1.16
diff -u -p -r1.16 if_altq.h
--- altq/if_altq.h  12 Oct 2013 12:13:10 -  1.16
+++ altq/if_altq.h  20 Oct 2013 21:25:41 -
@@ -36,6 +36,13 @@ struct altq_pktattr; struct oldtb_regula
 /*
  * Structure defining a queue for a network interface.
  */
+
+/* XXX hack, because we need the structure definition */
+#define ALTQ_IS_ENABLED1
+#include net/hfsc.h
+#undef ALTQ_IS_ENABLED
+/* XXX hack */
+
 struct ifaltq {
/* fields compatible with struct ifqueue */
struct {
@@ -45,8 +52,8 @@ structifaltq {
int ifq_len;
int ifq_maxlen;
int ifq_drops;
-   struct  hfsc_if *ifq_hfsc;
struct  timeout *ifq_congestion;
+   struct  hfsc_if  ifq_hfsc;
 
/* alternate queueing related fields */
int altq_type;  /* discipline type */
Index: net/hfsc.c
===
RCS file: /cvs/src/sys/net/hfsc.c,v
retrieving revision 1.1
diff -u -p -r1.1 hfsc.c
--- net/hfsc.c  12 Oct 2013 11:39:17 -  1.1
+++ net/hfsc.c  20 Oct 2013 21:25:42 -
@@ -63,7 +63,7 @@
 /*
  * function prototypes
  */
-struct hfsc_class  *hfsc_class_create(struct hfsc_if *,
+struct hfsc_class  *hfsc_class_create(struct ifqueue *,
struct hfsc_sc *, struct hfsc_sc *,
struct hfsc_sc *, struct hfsc_class *, int,
int, int);
@@ -128,18 +128,49 @@ hfsc_microuptime(void)
HFSC_CLK_SHIFT);
 }
 
+/*
+ * The new table will be exactly one page larger, so in the most
+ * common case of 8B pointers and 4KB pages it's 512 more classes.
+ * Returns the amount of classes, so all new pages are 100% utilized.
+ */
+static inline u_int
+hfsc_more_slots(u_int current)
+{
+   u_int was_pages = current * sizeof(void *) / PAGE_SIZE;
+   u_int n = ((was_pages + 1) * PAGE_SIZE) / sizeof(void *);
+
+   return (n);
+}
+
+static void
+hfsc_grow_class_tbl(struct hfsc_if *hif)
+{
+   struct hfsc_class **newtbl, **old = hif-hif_class_tbl;
+   const u_int slots = hfsc_more_slots(hif-hif_allocated);
+
+   newtbl = malloc(slots * sizeof(void *), M_DEVBUF, M_WAITOK | M_ZERO);
+   memcpy(newtbl, old, hif-hif_allocated * sizeof(void *));
+
+   hif-hif_allocated = slots;
+   hif-hif_class_tbl = newtbl;
+
+   free(old, M_DEVBUF);
+}
+
 int
 hfsc_attach(struct ifnet *ifp)
 {
-   struct hfsc_if *hif;
+   const u_int slots = hfsc_more_slots(0);
+   const size_t sz = slots * sizeof(void *);
+   struct hfsc_if *hif = ifp-if_snd.ifq_hfsc;
 
-   if (ifp-if_snd.ifq_hfsc != NULL)
+   if (hif-hif_class_tbl != NULL)
return (0);
 
-   hif = malloc(sizeof(struct hfsc_if), M_DEVBUF, M_WAITOK|M_ZERO);
+   hif-hif_class_tbl = malloc(sz, M_DEVBUF, M_WAITOK | M_ZERO);
+   hif-hif_allocated = slots;
+   hif-hif_classes = 0;
hif-hif_eligible = hfsc_ellist_alloc();
-   hif-hif_ifq = (struct ifqueue *)ifp-if_snd; /* XXX cast temp */
-   ifp-if_snd.ifq_hfsc = hif;
 
return (0);
 }
@@ -147,8 +178,11 @@ hfsc_attach(struct ifnet *ifp)
 int
 hfsc_detach(struct ifnet *ifp)
 {
-   free(ifp-if_snd.ifq_hfsc, M_DEVBUF);
-   ifp-if_snd.ifq_hfsc = NULL;
+   struct hfsc_if *hif = ifp-if_snd.ifq_hfsc;
+
+   hif-hif_allocated = hif-hif_classes = 0;
+   free(hif-hif_class_tbl, M_DEVBUF);
+   hif-hif_class_tbl = NULL;
 
return (0);
 }
@@ -156,11 +190,13 @@ hfsc_detach(struct ifnet *ifp)
 int
 hfsc_addqueue(struct pf_queuespec *q)
 {
-   struct hfsc_if *hif;
+   /* XXX remove the cast when ifaltq is gone. */
+   struct ifqueue *ifq = (struct ifqueue *)q-kif-pfik_ifp-if_snd;
+   struct hfsc_if *hif = ifq-ifq_hfsc;
struct hfsc_class *cl, *parent;
struct hfsc_sc rtsc, lssc, ulsc;
 
-   if ((hif = q-kif-pfik_ifp-if_snd.ifq_hfsc) == NULL)
+   if (hif-hif_allocated == 0)
return (EINVAL);
 
if (q-parent_qid == HFSC_NULLCLASS_HANDLE 
@@ -185,7 +221,7 @@ hfsc_addqueue(struct pf_queuespec *q)
ulsc.d  = q-upperlimit.d;
ulsc.m2 = q-upperlimit.m2.absolute;
 
-   cl = hfsc_class_create(hif, rtsc, lssc, ulsc,
+   cl = hfsc_class_create(ifq, rtsc, lssc, ulsc,
parent, q-qlimit, q-flags, q-qid);
if (cl == NULL

Re: partial xlocale(3) port from FreeBSD

2013-10-20 Thread Martin Pelikan
   Obviously, our locale support still sucks, this patch is mostly 
   providing the API for filling the blanks later.
 
 Which blanks exactly? Locale features we don't have, such as collation?

Yes.  The features why for example PostgreSQL won't sort tables
correctly, which if you live in a country with weird characters in their
language is... quite unfortunate.

I was planning on bringing specifically LC_COLLATE support for a long
time, but it's quite a lot of work. (and testing, and bugfixing with
languages I don't even know existed)

  How much did the ramdisks grow by when you built release with this?  
  Having just freed up a bunch of space on the ramdisks, I'll be pissed if 
  we squander it all immediately.

No objections against #ifndef SMALL_KERNEL-ing the big bits.

 I'm not very excited about xlocale. If the only goal here is an API
 shim to compile a C++ library, can't we put the shim somewhere else
 than libc? Like the misc/libutf8 port we used to have?

Thought about it too, but since apps expect to find this stuff in libc,
I went for a libc diff hoping that porters will have their lives easier.
The functions I ported were the ones ld-2.17 complained about.  I have
no idea whether that port is complete and I don't claim the diff to be
ready.  It gets the job done at the cost of being huge and probably
wrong in places, and is open for discussion.

I don't care about xlocale either.  What'd I like is to have C++11
working out of the box for the next release (Is that real?) and
hopefully collation support some time in the future.  Later in the
process I noticed there is an even smaller shim intended for Solaris as
a part of libcxx, but my thoughts were:
 - Locale has always been a pain in the ass, but something users demand.
   (or is it just me with postgresql?)
 - Sharing this stuff with FreeBSD will make our lives easier should
   anything go wrong with it.  Less work for us + satisfied customers.
 - We don't have to be complete, or even advertise it very much.  But
   stuff that is increasingly popular (like C++11) will work out of the
   box.  The ability to use modern toolchains for ports should make the
   latency-savvy desktop users happier.
 - Since a lot of operating systems have now adopted solutions (being it
   xlocale or others), I suspect libcxx maintainers won't be very happy
   about #ifdef __OpenBSD__ remove half of the functionality

Please correct me if the philosophy is wrong.  Or better, suggest other
ways forward :-)
--
Martin Pelikan



unlimited HFSC v2: [part 2]: the actual work

2013-10-19 Thread Martin Pelikan
Hi.

As promised.  Feel free to comm{en,i}t. :-)
Note that I've changed 'ifq-ifq_hfsc' to 'ifq-ifq_hfsc_data;' a lot
of times; this is because some of the code is in the hot path and the
reference is computed at compile time and not loaded from that useless
pointer field (which will be gone after transition).

Compiles  is being tested on amd64.
--
Martin Pelikan


Index: altq/if_altq.h
===
RCS file: /cvs/src/sys/altq/if_altq.h,v
retrieving revision 1.16
diff -u -p -r1.16 if_altq.h
--- altq/if_altq.h  12 Oct 2013 12:13:10 -  1.16
+++ altq/if_altq.h  19 Oct 2013 11:59:04 -
@@ -29,28 +29,21 @@
 #ifndef _ALTQ_IF_ALTQ_H_
 #define_ALTQ_IF_ALTQ_H_
 
-struct altq_pktattr; struct oldtb_regulator; struct hfsc_if;
+struct altq_pktattr; struct oldtb_regulator; struct mbuf;
 
 #define ALTQ_IFQ_NQUEUES   8
 
 /*
- * Structure defining a queue for a network interface.
+ * Until we get rid of ALTQ, we'd better preserve binary compatibility
+ * between ifaltq and ifqueue in if.h.  But ifaltq had all these
+ * appended to it, and we'd like to separate queue table from hfsc_if.
+ * Therefore, this ALTQ stuff will share space with hfsc_if during the
+ * transition and then will go away.
  */
-struct ifaltq {
-   /* fields compatible with struct ifqueue */
-   struct {
-   struct  mbuf *head;
-   struct  mbuf *tail;
-   }   ifq_q[ALTQ_IFQ_NQUEUES];
-   int ifq_len;
-   int ifq_maxlen;
-   int ifq_drops;
-   struct  hfsc_if *ifq_hfsc;
-   struct  timeout *ifq_congestion;
-
+struct ifaltq;
+struct  altq_stuff {
/* alternate queueing related fields */
int altq_type;  /* discipline type */
-   int altq_flags; /* flags (e.g. ready, in-use) */
void*altq_disc; /* for discipline-specific use */
struct  ifnet *altq_ifp;/* back pointer to interface */
 
@@ -66,6 +59,48 @@ struct   ifaltq {
/* token bucket regulator */
struct  oldtb_regulator *altq_tbr;
 };
+
+/*
+ * Structure defining a queue for a network interface.
+ * XXX ALTQ_TRANSITION ifq_hfsc points to the union in the non-altq case.
+ */
+/* XXX hack, because we need the structure definition */
+#define ALTQ_IS_ENABLED1
+#include net/hfsc.h
+#undef ALTQ_IS_ENABLED
+/* XXX hack */
+
+struct ifaltq {
+   /* fields compatible with struct ifqueue */
+   struct {
+   struct  mbuf *head;
+   struct  mbuf *tail;
+   }   ifq_q[ALTQ_IFQ_NQUEUES];
+   int ifq_len;
+   int ifq_maxlen;
+   int ifq_drops;
+   struct  hfsc_if *ifq_hfsc;
+   struct  timeout *ifq_congestion;
+
+   union {
+   struct altq_stuff   altq;
+   struct hfsc_if  hfsc;
+   } transition;
+
+   /* This can't be in the union, because it says if is ALTQ on. */
+   int altq_flags; /* flags (e.g. ready, in-use) */
+};
+#definealtq_type   transition.altq.altq_type
+#definealtq_disc   transition.altq.altq_disc
+#definealtq_ifptransition.altq.altq_ifp
+#definealtq_enqueuetransition.altq.altq_enqueue
+#definealtq_dequeuetransition.altq.altq_dequeue
+#definealtq_requesttransition.altq.altq_request
+#definealtq_clfier transition.altq.altq_clfier
+#definealtq_classify   transition.altq.altq_classify
+#definealtq_tbrtransition.altq.altq_tbr
+#defineifq_hfsc_data   transition.hfsc
+
 
 #ifdef _KERNEL
 
Index: net/hfsc.c
===
RCS file: /cvs/src/sys/net/hfsc.c,v
retrieving revision 1.1
diff -u -p -r1.1 hfsc.c
--- net/hfsc.c  12 Oct 2013 11:39:17 -  1.1
+++ net/hfsc.c  19 Oct 2013 12:20:38 -
@@ -63,7 +63,7 @@
 /*
  * function prototypes
  */
-struct hfsc_class  *hfsc_class_create(struct hfsc_if *,
+struct hfsc_class  *hfsc_class_create(struct ifqueue *,
struct hfsc_sc *, struct hfsc_sc *,
struct hfsc_sc *, struct hfsc_class *, int,
int, int);
@@ -128,18 +128,49 @@ hfsc_microuptime(void)
HFSC_CLK_SHIFT);
 }
 
+/*
+ * The new table will be exactly one page larger, so in the most
+ * common case of 8B pointers and 4KB pages it's 512 more classes.
+ * Returns the amount of classes, so all new pages are 100% utilized.
+ */
+static inline u_int
+hfsc_new_amount(u_int old)
+{
+   u_int was_pages = old * sizeof(void *) / PAGE_SIZE;
+   u_int n = ((was_pages + 1) * PAGE_SIZE) / sizeof(void *);
+
+   return (n);
+}
+
+static void
+hfsc_grow(struct hfsc_if *hif)
+{
+   struct hfsc_class **newtbl, **old = hif-hif_class_tbl;
+   const u_int slots = hfsc_new_amount(hif-hif_allocated);
+
+   newtbl = malloc(slots

unlimited HFSC v2: [part 1] rename pf_altq::altq_disc

2013-10-18 Thread Martin Pelikan
Hi.

After several suggestions from Henning I started working putting parts
of hfsc_if into ifaltq, where normally ALTQ bits would be (altq_flags
will have to stay outside of the union, because that's how you know if
altq has been enabled on that ifnet).  But making that union will mean
altq_disc as transition.altq.altq_disc will clash with pf_altq's
member.  This diff renames pf_altq::altq_disc to altq_disc_state.
(Ah, the good old C namespace problems! ;-))

It builds on amd64 and shouldn't make any difference.

ok?
--
Martin Pelikan


Index: altq/altq_cbq.c
===
RCS file: /cvs/src/sys/altq/altq_cbq.c,v
retrieving revision 1.26
diff -u -p -r1.26 altq_cbq.c
--- altq/altq_cbq.c 3 Jul 2011 23:59:43 -   1.26
+++ altq/altq_cbq.c 18 Oct 2013 16:49:09 -
@@ -189,10 +189,10 @@ cbq_pfattach(struct pf_altq *a)
struct ifnet*ifp;
int  s, error;
 
-   if ((ifp = ifunit(a-ifname)) == NULL || a-altq_disc == NULL)
+   if ((ifp = ifunit(a-ifname)) == NULL || a-altq_disc_state == NULL)
return (EINVAL);
s = splnet();
-   error = altq_attach(ifp-if_snd, ALTQT_CBQ, a-altq_disc,
+   error = altq_attach(ifp-if_snd, ALTQT_CBQ, a-altq_disc_state,
cbq_enqueue, cbq_dequeue, cbq_request, NULL, NULL);
splx(s);
return (error);
@@ -216,7 +216,7 @@ cbq_add_altq(struct pf_altq *a)
cbqp-ifnp.ifq_ = ifp-if_snd; /* keep the ifq */
 
/* keep the state in pf_altq */
-   a-altq_disc = cbqp;
+   a-altq_disc_state = cbqp;
 
return (0);
 }
@@ -226,9 +226,9 @@ cbq_remove_altq(struct pf_altq *a)
 {
cbq_state_t *cbqp;
 
-   if ((cbqp = a-altq_disc) == NULL)
+   if ((cbqp = a-altq_disc_state) == NULL)
return (EINVAL);
-   a-altq_disc = NULL;
+   a-altq_disc_state = NULL;
 
cbq_clear_interface(cbqp);
 
@@ -252,7 +252,7 @@ cbq_add_queue(struct pf_altq *a)
struct cbq_opts *opts;
int i;
 
-   if ((cbqp = a-altq_disc) == NULL)
+   if ((cbqp = a-altq_disc_state) == NULL)
return (EINVAL);
if (a-qid == 0)
return (EINVAL);
@@ -360,7 +360,7 @@ cbq_remove_queue(struct pf_altq *a)
cbq_state_t *cbqp;
int i;
 
-   if ((cbqp = a-altq_disc) == NULL)
+   if ((cbqp = a-altq_disc_state) == NULL)
return (EINVAL);
 
if ((cl = clh_to_clp(cbqp, a-qid)) == NULL)
Index: altq/altq_hfsc.c
===
RCS file: /cvs/src/sys/altq/altq_hfsc.c,v
retrieving revision 1.29
diff -u -p -r1.29 altq_hfsc.c
--- altq/altq_hfsc.c18 Sep 2011 20:34:29 -  1.29
+++ altq/altq_hfsc.c18 Oct 2013 16:49:09 -
@@ -134,10 +134,10 @@ hfsc_pfattach(struct pf_altq *a)
struct ifnet *ifp;
int s, error;
 
-   if ((ifp = ifunit(a-ifname)) == NULL || a-altq_disc == NULL)
+   if ((ifp = ifunit(a-ifname)) == NULL || a-altq_disc_state == NULL)
return (EINVAL);
s = splnet();
-   error = altq_attach(ifp-if_snd, ALTQT_HFSC, a-altq_disc,
+   error = altq_attach(ifp-if_snd, ALTQT_HFSC, a-altq_disc_state,
altq_hfsc_enqueue, altq_hfsc_dequeue, altq_hfsc_request, NULL,
NULL);
splx(s);
@@ -162,7 +162,7 @@ hfsc_add_altq(struct pf_altq *a)
hif-hif_ifq = ifp-if_snd;
 
/* keep the state in pf_altq */
-   a-altq_disc = hif;
+   a-altq_disc_state = hif;
 
return (0);
 }
@@ -172,9 +172,9 @@ hfsc_remove_altq(struct pf_altq *a)
 {
struct altq_hfsc_if *hif;
 
-   if ((hif = a-altq_disc) == NULL)
+   if ((hif = a-altq_disc_state) == NULL)
return (EINVAL);
-   a-altq_disc = NULL;
+   a-altq_disc_state = NULL;
 
(void)hfsc_clear_interface(hif);
(void)altq_hfsc_class_destroy(hif-hif_rootclass);
@@ -194,7 +194,7 @@ hfsc_add_queue(struct pf_altq *a)
struct hfsc_opts *opts;
struct service_curve rtsc, lssc, ulsc;
 
-   if ((hif = a-altq_disc) == NULL)
+   if ((hif = a-altq_disc_state) == NULL)
return (EINVAL);
 
opts = a-pq_u.hfsc_opts;
@@ -235,7 +235,7 @@ hfsc_remove_queue(struct pf_altq *a)
struct altq_hfsc_if *hif;
struct altq_hfsc_class *cl;
 
-   if ((hif = a-altq_disc) == NULL)
+   if ((hif = a-altq_disc_state) == NULL)
return (EINVAL);
 
if ((cl = clh_to_clp(hif, a-qid)) == NULL)
Index: altq/altq_priq.c
===
RCS file: /cvs/src/sys/altq/altq_priq.c,v
retrieving revision 1.24
diff -u -p -r1.24 altq_priq.c
--- altq/altq_priq.c3 Jul 2011 23:59:43 -   1.24
+++ altq/altq_priq.c18 Oct 2013 16:49:09 -
@@ -73,10 +73,10 @@ priq_pfattach(struct pf_altq *a)
struct ifnet *ifp;
int s, error

Re: partial xlocale(3) port from FreeBSD

2013-10-15 Thread Martin Pelikan
 - port xlocale(3) from FreeBSD -- this is what this patch does
 
 I did the patch in two parts, separating include/ and lib/libc/, because my

Second part (include/ headers) follows.
--
Martin Pelikan


Index: _ctype.h
===
RCS file: _ctype.h
diff -N _ctype.h
--- /dev/null   1 Jan 1970 00:00:00 -
+++ _ctype.h15 Oct 2013 12:51:52 -
@@ -0,0 +1,189 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Paul Borman at Krystal Technologies.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *notice, this list of conditions and the following disclaimer in the
+ *documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *may be used to endorse or promote products derived from this software
+ *without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * From @(#)ctype.h8.4 (Berkeley) 1/21/94
+ * From FreeBSD: src/include/ctype.h,v 1.27 2004/06/23 07:11:39 tjr Exp
+ */
+
+#ifndef __CTYPE_H_
+#define__CTYPE_H_
+
+#include sys/cdefs.h
+#include sys/_types.h
+
+extern const _RuneLocale _DefaultRuneLocale;
+extern const _RuneLocale *_CurrentRuneLocale;
+
+#define_CTYPE_A0x0100L /* Alpha */
+#define_CTYPE_C0x0200L /* Control */
+#define_CTYPE_D0x0400L /* Digit */
+#define_CTYPE_G0x0800L /* Graph */
+#define_CTYPE_L0x1000L /* Lower */
+#define_CTYPE_P0x2000L /* Punct */
+#define_CTYPE_S0x4000L /* Space */
+#define_CTYPE_U0x8000L /* Upper */
+#define_CTYPE_X0x0001L /* X digit */
+#define_CTYPE_B0x0002L /* Blank */
+#define_CTYPE_R0x0004L /* Print */
+#define_CTYPE_I0x0008L /* Ideogram */
+#define_CTYPE_T0x0010L /* Special */
+#define_CTYPE_Q0x0020L /* Phonogram */
+#define_CTYPE_SW0  0x2000L /* 0 width character */
+#define_CTYPE_SW1  0x4000L /* 1 width character */
+#define_CTYPE_SW2  0x8000L /* 2 width character */
+#define_CTYPE_SW3  0xc000L /* 3 width character */
+#define_CTYPE_SWM  0xe000L /* Mask for screen 
width data */
+#define_CTYPE_SWS  30  /* Bits to shift to get 
width */
+
+/* See comments in sys/_types.h about __ct_rune_t. */
+__BEGIN_DECLS
+unsigned long  ___runetype(__ct_rune_t) __pure;
+__ct_rune_t___tolower(__ct_rune_t) __pure;
+__ct_rune_t___toupper(__ct_rune_t) __pure;
+__END_DECLS
+
+/*
+ * _EXTERNALIZE_CTYPE_INLINES_ is defined in locale/nomacros.c to tell us
+ * to generate code for extern versions of all our inline functions.
+ */
+#ifdef _EXTERNALIZE_CTYPE_INLINES_
+#define_USE_CTYPE_INLINE_
+#definestatic
+#define__inline
+#endif
+
+extern int __mb_sb_limit;
+
+/*
+ * Use inline functions if we are allowed to and the compiler supports them.
+ */
+#if !defined(_DONT_USE_CTYPE_INLINE_)  \
+(defined(_USE_CTYPE_INLINE_) || defined

Re: partial xlocale(3) port from FreeBSD

2013-10-15 Thread Martin Pelikan
  - port xlocale(3) from FreeBSD -- this is what this patch does
  
 Second part (include/ headers) follows.

Apologies, I left one bit out.  Here it is.
--
Martin Pelikan


Index: locale.h
===
RCS file: /cvs/src/include/locale.h,v
retrieving revision 1.8
diff -u -p -r1.8 locale.h
--- locale.h3 Jul 2011 18:51:01 -   1.8
+++ locale.h15 Oct 2013 19:21:31 -
@@ -54,6 +54,12 @@ struct lconv {
charn_sep_by_space;
charp_sign_posn;
charn_sign_posn;
+   charint_p_cs_precedes;
+   charint_n_cs_precedes;
+   charint_p_sep_by_space;
+   charint_n_sep_by_space;
+   charint_p_sign_posn;
+   charint_n_sign_posn;
 };
 
 #ifndef NULL



Re: help X11 performance: make sigprocmask(2) SY_NOLOCK

2013-06-20 Thread Martin Pelikan
 I think this is only changed on the context of curproc, so there will
 only be one dude modifying it at a time.
 Problem I see is if there is other code which reads p_sigmask twice,
 since it losed the atomicity between two reads, don't know what can
 happen.

This was exactly my point.  Now that we've established aligned int writes
are fine, there's the wrong assumption that two reads from that memory
(even created by the compiler) must have to have the same value.

 On a quick look it doesn't seem to happen.

Looking at the code more closely:

- DIAGNOSTIC panic(postsig action) could be triggered due to this
  (maybe not, I haven't checked where is postsig() called from and if
  the user has a chance to change the sigmask, but I don't see the point
  of that panic there -- what do others think?)
- I completely forgot about compat_linux syscalls.  This new version
  fixes them as well (yes the code is duplicated, do we want to change
  that?) and then there was one useless splhigh().

 Also, is there any situation where reading the old value is not ok ?
 this would only happen if another cpu tries to check p_sigmask of
 something else than curproc.

It seems there isn't.
--
Martin Pelikan


Index: compat/linux/linux_signal.c
===
RCS file: /cvs/src/sys/compat/linux/linux_signal.c,v
retrieving revision 1.15
diff -u -p -r1.15 linux_signal.c
--- compat/linux/linux_signal.c 19 Jun 2012 11:35:29 -  1.15
+++ compat/linux/linux_signal.c 20 Jun 2013 10:12:40 -
@@ -584,7 +584,6 @@ linux_sys_sigprocmask(p, v, retval)
linux_old_sigset_t ss;
sigset_t bs;
int error = 0;
-   int s;
 
*retval = 0;
 
@@ -604,19 +603,18 @@ linux_sys_sigprocmask(p, v, retval)
 
linux_old_to_bsd_sigset(ss, bs);
 
-   s = splhigh();
-
+   bs = ~sigcantmask;
switch (SCARG(uap, how)) {
case LINUX_SIG_BLOCK:
-   p-p_sigmask |= bs  ~sigcantmask;
+   atomic_setbits_int(p-p_sigmask, bs);
break;
 
case LINUX_SIG_UNBLOCK:
-   p-p_sigmask = ~bs;
+   atomic_clearbits_int(p-p_sigmask, bs);
break;
 
case LINUX_SIG_SETMASK:
-   p-p_sigmask = bs  ~sigcantmask;
+   p-p_sigmask = bs;
break;
 
default:
@@ -624,8 +622,6 @@ linux_sys_sigprocmask(p, v, retval)
break;
}
 
-   splx(s);
-
return (error);
 }
 
@@ -644,7 +640,6 @@ linux_sys_rt_sigprocmask(p, v, retval)
linux_sigset_t ls;
sigset_t bs;
int error = 0;
-   int s;
 
if (SCARG(uap, sigsetsize) != sizeof(linux_sigset_t))
return (EINVAL);
@@ -667,19 +662,18 @@ linux_sys_rt_sigprocmask(p, v, retval)
 
linux_to_bsd_sigset(ls, bs);
 
-   s = splhigh();
-
+   bs = ~sigcantmask;
switch (SCARG(uap, how)) {
case LINUX_SIG_BLOCK:
-   p-p_sigmask |= bs  ~sigcantmask;
+   atomic_setbits_int(p-p_sigmask, bs);
break;
 
case LINUX_SIG_UNBLOCK:
-   p-p_sigmask = ~bs;
+   atomic_clearbits_int(p-p_sigmask, bs);
break;
 
case LINUX_SIG_SETMASK:
-   p-p_sigmask = bs  ~sigcantmask;
+   p-p_sigmask = bs;
break;
 
default:
@@ -687,8 +681,6 @@ linux_sys_rt_sigprocmask(p, v, retval)
break;
}
 
-   splx(s);
-
return (error);
 }
 
@@ -727,16 +719,13 @@ linux_sys_sigsetmask(p, v, retval)
} */ *uap = v;
linux_old_sigset_t mask;
sigset_t bsdsig;
-   int s;
 
bsd_to_linux_old_sigset(p-p_sigmask, (linux_old_sigset_t *)retval);
 
mask = SCARG(uap, mask);
bsd_to_linux_old_sigset(bsdsig, mask);
 
-   s = splhigh();
p-p_sigmask = bsdsig  ~sigcantmask;
-   splx(s);
 
return (0);
 }
Index: kern/init_sysent.c
===
RCS file: /cvs/src/sys/kern/init_sysent.c,v
retrieving revision 1.145
diff -u -p -r1.145 init_sysent.c
--- kern/init_sysent.c  9 Jun 2013 13:10:27 -   1.145
+++ kern/init_sysent.c  20 Jun 2013 10:12:41 -
@@ -136,7 +136,7 @@ struct sysent sysent[] = {
sys_sigaction },/* 46 = sigaction */
{ 0, 0, 0,
sys_getgid },   /* 47 = getgid */
-   { 2, s(struct sys_sigprocmask_args), 0,
+   { 2, s(struct sys_sigprocmask_args), SY_NOLOCK | 0,
sys_sigprocmask },  /* 48 = sigprocmask */
{ 2, s(struct sys_getlogin_args), 0,
sys_getlogin }, /* 49 = getlogin */
Index: kern/kern_sig.c
===
RCS file: /cvs/src/sys/kern/kern_sig.c,v
retrieving revision 1.152
diff -u -p -r1.152 kern_sig.c
--- kern/kern_sig.c 1 Jun 2013

Re: help X11 performance: make sigprocmask(2) SY_NOLOCK

2013-06-20 Thread Martin Pelikan
  I'm only asking if such a situation can happen, or if there is some
  ensure_this_assignment_is_always_atomic(p-p_sigmask, mask); function
  that I missed.
 
 There isn't one.  Unfortunately, we might need one to support SMP on
 hppa, where we have the emulate atomic operations using a lock.  In
 fact you need to worry about reading as well.  And since ptsignal()
 looks at the p_sigmask of other threads in the process, it is not
 immediately obvious there isn't a problem here.

Does this mean that on a potentially SMP hppa just an unlocked aligned
word load can produce invalid data?  The HP's reference seems to be
'unavailable'...

 I think the fact that only the thread itself can change its sigmask
 means that there isn't an issue here.  But that probably means that
 bothering with atomic_setbits_int/atomic_clearbits_int isn't necessary
 in the first place.

Why was the splhigh() there in the first place?  I thought that on i386
and the like, AND to a memory location isn't atomic (without the LOCK
prefix) and therefore other thread reading the value might read garbage.
Whereas MOV is ok.  Obviously, sane architectures doing LD, AND and ST
are ok as well, as long as the ST is atomic (I don't have any experience
with hppa and random search on the internet suggested that pa-riscs
do have word stores atomically).
--
Martin Pelikan



Re: help X11 performance: make sigprocmask(2) SY_NOLOCK

2013-06-20 Thread Martin Pelikan
 And p_sigmask is copied during fork, so that needs a bit of thought.
 I guess it doesn't matter for the new child, as it isn't running yet
 and therefore can't invoke sigprocmask(2).  And the parent should be
 safe as well, as it is in fork(2) and therefore can't call
 sigprocmask(2) either.

memcpy(p_startcopy) which is, tadaaa, p_sigmask!  From implementations
of memcpy I've seen (usage of SSE or non-temporal instructions to begin
with) it seems that no guarantee about how big a chunk does it copy
atomically can be assumed...

So a multithreaded process doing sigprocmask(2) in one thread and
simultaneously fork(2) in the other?  Not a problem either, because
1003.1 2004 edition says that sigprocmask(2) is equivalent to
pthread_sigmask(3) and the fork only copies the other thread, not
bothering with the sigprocmask-ing one.
I haven't checked the code that thoroughly but this is what POSIX says.

 There's also a couple of additional p-p_sigmask |= ... in kern_sig.c.
 
 All of these need to become atomic_setbits_int/atomic_clearbits_int calls.

Does the third one do the charm?  I haven't compiled the other arch's,
but i386 is running fine.
--
Martin Pelikan


Index: arch/hp300/hp300/trap.c
===
RCS file: /cvs/src/sys/arch/hp300/hp300/trap.c,v
retrieving revision 1.62
diff -u -p -r1.62 trap.c
--- arch/hp300/hp300/trap.c 31 Dec 2012 06:46:13 -  1.62
+++ arch/hp300/hp300/trap.c 20 Jun 2013 12:51:46 -
@@ -321,7 +321,7 @@ dopanic:
i = sigmask(SIGILL);
p-p_sigacts-ps_sigignore = ~i;
p-p_sigacts-ps_sigcatch = ~i;
-   p-p_sigmask = ~i;
+   atomic_clearbits_int(p-p_sigmask, i);
i = SIGILL;
ucode = frame.f_format; /* XXX was ILL_RESAD_FAULT */
typ = ILL_COPROC;
Index: arch/mvme68k/mvme68k/trap.c
===
RCS file: /cvs/src/sys/arch/mvme68k/mvme68k/trap.c,v
retrieving revision 1.77
diff -u -p -r1.77 trap.c
--- arch/mvme68k/mvme68k/trap.c 31 Dec 2012 06:46:13 -  1.77
+++ arch/mvme68k/mvme68k/trap.c 20 Jun 2013 12:51:47 -
@@ -260,7 +260,7 @@ copyfault:
i = sigmask(SIGILL);
p-p_sigacts-ps_sigignore = ~i;
p-p_sigacts-ps_sigcatch = ~i;
-   p-p_sigmask = ~i;
+   atomic_clearbits_int(p-p_sigmask, i);
i = SIGILL;
ucode = frame.f_format; /* XXX was ILL_RESAD_FAULT */
typ = ILL_COPROC;
Index: compat/linux/linux_signal.c
===
RCS file: /cvs/src/sys/compat/linux/linux_signal.c,v
retrieving revision 1.15
diff -u -p -r1.15 linux_signal.c
--- compat/linux/linux_signal.c 19 Jun 2012 11:35:29 -  1.15
+++ compat/linux/linux_signal.c 20 Jun 2013 12:51:49 -
@@ -584,7 +584,6 @@ linux_sys_sigprocmask(p, v, retval)
linux_old_sigset_t ss;
sigset_t bs;
int error = 0;
-   int s;
 
*retval = 0;
 
@@ -604,19 +603,18 @@ linux_sys_sigprocmask(p, v, retval)
 
linux_old_to_bsd_sigset(ss, bs);
 
-   s = splhigh();
-
+   bs = ~sigcantmask;
switch (SCARG(uap, how)) {
case LINUX_SIG_BLOCK:
-   p-p_sigmask |= bs  ~sigcantmask;
+   atomic_setbits_int(p-p_sigmask, bs);
break;
 
case LINUX_SIG_UNBLOCK:
-   p-p_sigmask = ~bs;
+   atomic_clearbits_int(p-p_sigmask, bs);
break;
 
case LINUX_SIG_SETMASK:
-   p-p_sigmask = bs  ~sigcantmask;
+   p-p_sigmask = bs;
break;
 
default:
@@ -624,8 +622,6 @@ linux_sys_sigprocmask(p, v, retval)
break;
}
 
-   splx(s);
-
return (error);
 }
 
@@ -644,7 +640,6 @@ linux_sys_rt_sigprocmask(p, v, retval)
linux_sigset_t ls;
sigset_t bs;
int error = 0;
-   int s;
 
if (SCARG(uap, sigsetsize) != sizeof(linux_sigset_t))
return (EINVAL);
@@ -667,19 +662,18 @@ linux_sys_rt_sigprocmask(p, v, retval)
 
linux_to_bsd_sigset(ls, bs);
 
-   s = splhigh();
-
+   bs = ~sigcantmask;
switch (SCARG(uap, how)) {
case LINUX_SIG_BLOCK:
-   p-p_sigmask |= bs  ~sigcantmask;
+   atomic_setbits_int(p-p_sigmask, bs);
break;
 
case LINUX_SIG_UNBLOCK:
-   p-p_sigmask = ~bs;
+   atomic_clearbits_int(p-p_sigmask, bs);
break;
 
case LINUX_SIG_SETMASK:
-   p-p_sigmask = bs  ~sigcantmask;
+   p-p_sigmask = bs;
break;
 
default:
@@ -687,8 +681,6 @@ linux_sys_rt_sigprocmask(p, v, retval)
break;
}
 
-   splx(s);
-
return (error);
 }
 
@@ -727,16 +719,13 @@ linux_sys_sigsetmask(p, v, retval

help X11 performance: make sigprocmask(2) SY_NOLOCK

2013-06-19 Thread Martin Pelikan
Hi!

Recently, I had to unleash my crappy Atom and decided to run OpenBSD on it.
Then I became irritated with the performance of most X11 applications, and
started poking around:
 - libfreetype.so is quite common
 - it does setjmp (disguised as ft_setjmp) quite a lot
 - setjmp needs to do sigprocmask
 - sigprocmask (I presume) needs the kernel lock.

sigprocmask(2) is quite simple actually, but wasn't marked as NOLOCK,
so I made it so.  And then I made a pointless test to demonstrate my point,
and ran it on my pointless Atom N270 (two threads, one core from 1980s)
as well as in VMware, which is the only 4-core amd64 machine I can get
on right now.  (did I say anything about using virtual machines to measure
things?)

#include setjmp.h

int
main(void)
{
jmp_buf env;
unsigned i;

/* on amd64 this was too slow, so 20M was used instead */
for (i = 0; i  400; ++i) {
setjmp(env);
}
return (0);
}

Before:
Atom, i386, single process, 0m3.80s real 0m1.15s user 0m2.50s system
Atom, i386, two processes,  0m4.81s real 0m1.44s user 0m3.09s system

amd64, single process, i20M,   0m3.35s real 0m0.65s user 0m2.23s system
amd64, two processes, i20M,0m4.48s real 0m0.66s user 0m3.37s system

After:
Atom, i386, single process, 0m2.93s real 0m0.95s user 0m1.35s system
Atom, i386, two processes,  0m3.67s real 0m1.36s user 0m2.15s system

amd64, single process, i20M,   0m2.14s real 0m0.63s user 0m1.12s system
amd64, two processes, i20M,0m2.57s real 0m0.57s user 0m1.55s system


If anyone has a real test with some real-world application using lots
of setjmps, I'd like to see it.  What'd I like to see most, is if does
it work on other architectures and if it is indeed correct.  Although
the numbers suggest quite an improvement, bear in mind this kind of
load does not at all resemble any software actually doing useful stuff.

Any comments?
--
Martin Pelikan


Index: kern/init_sysent.c
===
RCS file: /cvs/src/sys/kern/init_sysent.c,v
retrieving revision 1.145
diff -u -p -r1.145 init_sysent.c
--- kern/init_sysent.c  9 Jun 2013 13:10:27 -   1.145
+++ kern/init_sysent.c  19 Jun 2013 09:49:57 -
@@ -136,7 +136,7 @@ struct sysent sysent[] = {
sys_sigaction },/* 46 = sigaction */
{ 0, 0, 0,
sys_getgid },   /* 47 = getgid */
-   { 2, s(struct sys_sigprocmask_args), 0,
+   { 2, s(struct sys_sigprocmask_args), SY_NOLOCK | 0,
sys_sigprocmask },  /* 48 = sigprocmask */
{ 2, s(struct sys_getlogin_args), 0,
sys_getlogin }, /* 49 = getlogin */
Index: kern/kern_sig.c
===
RCS file: /cvs/src/sys/kern/kern_sig.c,v
retrieving revision 1.152
diff -u -p -r1.152 kern_sig.c
--- kern/kern_sig.c 1 Jun 2013 04:05:26 -   1.152
+++ kern/kern_sig.c 19 Jun 2013 09:49:57 -
@@ -429,28 +429,25 @@ sys_sigprocmask(struct proc *p, void *v,
syscallarg(sigset_t) mask;
} */ *uap = v;
int error = 0;
-   int s;
sigset_t mask;
 
*retval = p-p_sigmask;
-   mask = SCARG(uap, mask);
-   s = splhigh();
+   mask = SCARG(uap, mask) ~ sigcantmask;
 
switch (SCARG(uap, how)) {
case SIG_BLOCK:
-   p-p_sigmask |= mask ~ sigcantmask;
+   atomic_setbits_int(p-p_sigmask, mask);
break;
case SIG_UNBLOCK:
-   p-p_sigmask = ~mask;
+   atomic_clearbits_int(p-p_sigmask, mask);
break;
case SIG_SETMASK:
-   p-p_sigmask = mask ~ sigcantmask;
+   p-p_sigmask = mask;
break;
default:
error = EINVAL;
break;
}
-   splx(s);
return (error);
 }
 
Index: kern/syscalls.master
===
RCS file: /cvs/src/sys/kern/syscalls.master,v
retrieving revision 1.133
diff -u -p -r1.133 syscalls.master
--- kern/syscalls.master9 Jun 2013 13:10:19 -   1.133
+++ kern/syscalls.master19 Jun 2013 09:49:57 -
@@ -123,7 +123,7 @@
const struct sigaction *nsa, \
struct sigaction *osa); }
 47 STD { gid_t sys_getgid(void); }
-48 STD { int sys_sigprocmask(int how, sigset_t mask); }
+48 STD NOLOCK  { int sys_sigprocmask(int how, sigset_t mask); }
 49 STD { int sys_getlogin(char *namebuf, u_int namelen); }
 50 STD { int sys_setlogin(const char *namebuf); }
 #ifdef ACCOUNTING



Re: help X11 performance: make sigprocmask(2) SY_NOLOCK

2013-06-19 Thread Martin Pelikan
 If you're right that atomic_{clear,set}bits_int is correct and
 sufficient and actually faster, then all dynamic executables would
 benefit from this speedup (sigprocmask is used in ld.so(1)).

Since on i386 GENERIC these atomic_* things don't emit the LOCK prefix,
performance shouldn't be an issue; I'm actually more worried about this bit:

  -   p-p_sigmask = mask ~ sigcantmask;
  +   p-p_sigmask = mask;

On the right architecture where a word store isn't atomic enough and
with the right compiler that decides to put p_sigmask on an address
ending with 0xFFF with 4k-sized pages, we have two problems already.

I'm only asking if such a situation can happen, or if there is some
ensure_this_assignment_is_always_atomic(p-p_sigmask, mask); function
that I missed.
--
Martin Pelikan



Re: nc(1) shutdown(2) typo

2013-03-19 Thread Martin Pelikan
 wfd is stdin, so doing a shutdown on it will mostly be a noop, right?

Of course you're right.  I was so focused on finding the bug I didn't
look above what the fd is :-(

Are you okay with removing this particular shutdown(2) line?
--
Martin Pelikan



make bpf(4) and tcpdump(8) useful with queueing

2013-03-19 Thread Martin Pelikan
Hi!

Yesterday I got sick and tired of guessing what the enormous traffic in our
default queue was, and filled an item from my wishlist since OpenBSD 4.7.

It works like this:

# tcpdump -Q queuename -ni em0
tcpdump: listening on vic0, queue a, link-type EN10MB
... usual output ...

This mail contains changes required to the kernel, subsequent ones have the 
(most complicated) tcpdump(8) bit and (untested) pcap(3) bit.

Any thoughts?  I find it extremely useful and very small change.  Will test it
on a system with thousands of queues, but don't expect anything bad with it.
--
Martin Pelikan


Index: altq/altq_hfsc.c
===
RCS file: /cvs/src/sys/altq/altq_hfsc.c,v
retrieving revision 1.29
diff -u -p -r1.29 altq_hfsc.c
--- altq/altq_hfsc.c18 Sep 2011 20:34:29 -  1.29
+++ altq/altq_hfsc.c19 Mar 2013 14:54:43 -
@@ -592,6 +592,7 @@ altq_hfsc_enqueue(struct ifaltq *ifq, st
m_freem(m);
return (ENOBUFS);
}
+   m-m_pkthdr.pf.qid = cl-cl_handle;
cl-cl_pktattr = NULL;
}
 
Index: net/bpf.c
===
RCS file: /cvs/src/sys/net/bpf.c,v
retrieving revision 1.83
diff -u -p -r1.83 bpf.c
--- net/bpf.c   28 Dec 2012 17:52:06 -  1.83
+++ net/bpf.c   19 Mar 2013 14:54:47 -
@@ -849,6 +849,14 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t 
(BPF_DIRECTION_IN|BPF_DIRECTION_OUT);
break;
 
+   case BIOCGQUEUE:/* get queue */
+   *(u_int *)addr = d-bd_queue;
+   break;
+
+   case BIOCSQUEUE:/* set queue */
+   d-bd_queue = *(u_int *)addr;
+   break;
+
case FIONBIO:   /* Non-blocking I/O */
if (*(int *)addr)
d-bd_rtout = -1;
@@ -1198,6 +1206,8 @@ bpf_mtap(caddr_t arg, struct mbuf *m, u_
for (d = bp-bif_dlist; d != 0; d = d-bd_next) {
++d-bd_rcount;
if ((direction  d-bd_dirfilt) != 0)
+   slen = 0;
+   else if (d-bd_queue  m-m_pkthdr.pf.qid != d-bd_queue)
slen = 0;
else
slen = bpf_filter(d-bd_rfilter, (u_char *)m,
Index: net/bpf.h
===
RCS file: /cvs/src/sys/net/bpf.h,v
retrieving revision 1.43
diff -u -p -r1.43 bpf.h
--- net/bpf.h   26 Mar 2012 19:37:42 -  1.43
+++ net/bpf.h   19 Mar 2013 14:54:48 -
@@ -119,6 +119,8 @@ struct bpf_version {
 #define BIOCGDLTLIST   _IOWR('B',123, struct bpf_dltlist)
 #define BIOCGDIRFILT   _IOR('B',124, u_int)
 #define BIOCSDIRFILT   _IOW('B',125, u_int)
+#define BIOCGQUEUE _IOR('B',126, u_int32_t)
+#define BIOCSQUEUE _IOW('B',127, u_int32_t)
 
 /*
  * Direction filters for BIOCSDIRFILT/BIOCGDIRFILT
Index: net/bpfdesc.h
===
RCS file: /cvs/src/sys/net/bpfdesc.h,v
retrieving revision 1.17
diff -u -p -r1.17 bpfdesc.h
--- net/bpfdesc.h   25 Mar 2006 22:41:47 -  1.17
+++ net/bpfdesc.h   19 Mar 2013 14:54:48 -
@@ -78,6 +78,7 @@ struct bpf_d {
u_char  bd_locked;  /* true if descriptor is locked */
u_char  bd_fildrop; /* true if filtered packets will be 
dropped */
u_char  bd_dirfilt; /* direction filter */
+   u_int   bd_queue;   /* the queue the user wants to watch (0 
== all) */
int bd_hdrcmplt;/* false to fill in src lladdr 
automatically */
int bd_async;   /* non-zero if packet reception should 
generate signal */
int bd_sig; /* signal to send upon packet reception 
*/



Re: make bpf(4) and tcpdump(8) useful with queueing

2013-03-19 Thread Martin Pelikan
 This mail contains changes required to the kernel, subsequent ones have the 
 (most complicated) tcpdump(8) bit and (untested) pcap(3) bit.

tcpdump(8) change follows:


Index: usr.sbin/tcpdump/privsep.c
===
RCS file: /cvs/src/usr.sbin/tcpdump/privsep.c,v
retrieving revision 1.30
diff -u -p -r1.30 privsep.c
--- usr.sbin/tcpdump/privsep.c  22 Sep 2011 09:12:30 -  1.30
+++ usr.sbin/tcpdump/privsep.c  19 Mar 2013 15:09:52 -
@@ -18,6 +18,7 @@
  */
 
 #include sys/types.h
+#include sys/ioctl.h
 #include sys/socket.h
 #include sys/wait.h
 
@@ -125,6 +126,41 @@ static voidimpl_getlines(int);
 static voidtest_state(int, int);
 static voidlogmsg(int, const char *, ...);
 
+
+static u_int32_t
+find_queue(const char *device, const char *queue)
+{
+   int fd;
+   u_int32_t q, total;
+   struct pfioc_altq pa;
+
+   if ((fd = open(/dev/pf, O_RDONLY)) == -1)
+   err(2, can't open pf to get queue ID);
+
+   memset(pa, 0, sizeof pa);
+   if (ioctl(fd, DIOCGETALTQS, pa))
+   err(2, DIOCGETALTQS);
+
+   total = pa.nr;
+   for (q = 0; q  total; ++q) {
+   pa.nr = q;
+   if (ioctl(fd, DIOCGETALTQ, pa))
+   err(2, DIOCGETALTQ, number %u, q);
+   if (pa.altq.qid == 0)
+   continue;
+   if (strncmp(pa.altq.ifname, device, sizeof pa.altq.ifname))
+   continue;
+   if (strncmp(pa.altq.qname, queue, sizeof pa.altq.qname))
+   continue;
+   break;
+   }
+   close(fd);
+   if (q == total)
+   warnx(WARNING: no queue %s on interface %s, choosing all,
+   queue, device);
+   return (q == total ? 0 : pa.altq.qid);
+}
+
 int
 priv_init(int argc, char **argv)
 {
@@ -200,7 +236,7 @@ priv_init(int argc, char **argv)
/* parse the arguments for required options */
opterr = 0;
while ((i = getopt(argc, argv,
-   ac:D:deE:fF:i:lLnNOopqr:s:StT:vw:xXy:Y)) != -1) {
+   ac:D:deE:fF:i:lLnNOopQ:qr:s:StT:vw:xXy:Y)) != -1) {
switch (i) {
case 'n':
nflag++;
@@ -320,8 +356,9 @@ impl_open_bpf(int fd, int *bpfd)
 {
int snaplen, promisc, err;
u_int dlt, dirfilt;
-   char device[IFNAMSIZ];
-   size_t iflen;
+   u_int32_t qid = 0;
+   char device[IFNAMSIZ], queue[PF_QNAME_SIZE];
+   size_t iflen, qlen;
 
logmsg(LOG_DEBUG, [priv]: msg PRIV_OPEN_BPF received);
 
@@ -332,7 +369,10 @@ impl_open_bpf(int fd, int *bpfd)
iflen = read_string(fd, device, sizeof(device), __func__);
if (iflen == 0)
errx(1, Invalid interface size specified);
-   *bpfd = pcap_live(device, snaplen, promisc, dlt, dirfilt);
+   qlen = read_string(fd, queue, sizeof queue, __func__);
+   if (qlen  1)
+   qid = find_queue(device, queue);
+   *bpfd = pcap_live(device, snaplen, promisc, dlt, dirfilt, qid);
err = errno;
if (*bpfd  0)
logmsg(LOG_DEBUG,
Index: usr.sbin/tcpdump/privsep.h
===
RCS file: /cvs/src/usr.sbin/tcpdump/privsep.h,v
retrieving revision 1.7
diff -u -p -r1.7 privsep.h
--- usr.sbin/tcpdump/privsep.h  25 Aug 2009 06:59:17 -  1.7
+++ usr.sbin/tcpdump/privsep.h  19 Mar 2013 15:09:52 -
@@ -47,11 +47,11 @@ int priv_init(int, char **);
 voidpriv_init_done(void);
 
 intsetfilter(int, int, char *);
-intpcap_live(const char *, int, int, u_int, u_int);
+intpcap_live(const char *, int, int, u_int, u_int, u_int32_t);
 
 struct bpf_program *priv_pcap_setfilter(pcap_t *, int, u_int32_t);
 pcap_t *priv_pcap_live(const char *, int, int, int, char *, u_int,
-   u_int);
+   u_int, const char *);
 pcap_t *priv_pcap_offline(const char *, char *);
 
 size_t priv_gethostbyaddr(char *, size_t, int, char *, size_t);
Index: usr.sbin/tcpdump/privsep_pcap.c
===
RCS file: /cvs/src/usr.sbin/tcpdump/privsep_pcap.c,v
retrieving revision 1.17
diff -u -p -r1.17 privsep_pcap.c
--- usr.sbin/tcpdump/privsep_pcap.c 14 Nov 2012 03:33:04 -  1.17
+++ usr.sbin/tcpdump/privsep_pcap.c 19 Mar 2013 15:09:52 -
@@ -173,7 +173,7 @@ priv_pcap_setfilter(pcap_t *hpcap, int o
 /* privileged part of priv_pcap_live */
 int
 pcap_live(const char *device, int snaplen, int promisc, u_int dlt,
-u_int dirfilt)
+u_int dirfilt, u_int32_t qid)
 {
charbpf[sizeof /dev/bpf00];
int fd, n = 0;
@@ -206,6 +206,8 @@ pcap_live(const char *device, int snaple
ioctl(fd, BIOCPROMISC, NULL);
if (ioctl(fd, BIOCSDIRFILT, dirfilt)  0)
goto error;
+   if (qid  ioctl(fd, BIOCSQUEUE, qid)  0)
+   

Re: make bpf(4) and tcpdump(8) useful with queueing

2013-03-19 Thread Martin Pelikan
 This mail contains changes required to the kernel, subsequent ones have the 
 (most complicated) tcpdump(8) bit and (untested) pcap(3) bit.

Just as pcap_setdirection(3), it doesn't have any current user, but it
might be useful to some.


Index: lib/libpcap/Makefile
===
RCS file: /cvs/src/lib/libpcap/Makefile,v
retrieving revision 1.23
diff -u -p -r1.23 Makefile
--- lib/libpcap/Makefile2 Aug 2012 13:38:39 -   1.23
+++ lib/libpcap/Makefile19 Mar 2013 14:25:03 -
@@ -23,6 +23,7 @@ MLINKS=   pcap.3 pcap_open_live.3 pcap.3 p
pcap.3 pcap_datalink_name_to_val.3 pcap.3 pcap_dump_fopen.3 \
pcap.3 pcap_sendpacket.3 pcap.3 pcap_next_ex.3 \
pcap.3 pcap_setdirection.3 pcap.3 pcap_dump_file.3 \
+   pcap.3 pcap_setqueue.3 \
pcap.3 pcap_dump_ftell.3 pcap.3 pcap_fopen_offline.3 \
pcap.3 pcap_dump_flush.3 pcap.3 pcap_create.3 \
pcap.3 pcap_set_snaplen.3 pcap.3 pcap_set_promisc.3 \
Index: lib/libpcap/pcap-bpf.c
===
RCS file: /cvs/src/lib/libpcap/pcap-bpf.c,v
retrieving revision 1.21
diff -u -p -r1.21 pcap-bpf.c
--- lib/libpcap/pcap-bpf.c  25 May 2012 01:58:08 -  1.21
+++ lib/libpcap/pcap-bpf.c  19 Mar 2013 14:25:03 -
@@ -1028,6 +1028,17 @@ pcap_setdirection(pcap_t *p, pcap_direct
 }
 
 int
+pcap_setqueue(pcap_t *p, u_int32_t qid)
+{
+   if (ioctl(p-fd, BIOCSQUEUE, qid)  0) {
+   snprintf(p-errbuf, PCAP_ERRBUF_SIZE, BIOCSQUEUE: %s,
+   pcap_strerror(errno));
+   return (-1);
+   }
+   return (0);
+}
+
+int
 pcap_set_datalink(pcap_t *p, int dlt)
 {
int i;
Index: lib/libpcap/pcap.3
===
RCS file: /cvs/src/lib/libpcap/pcap.3,v
retrieving revision 1.33
diff -u -p -r1.33 pcap.3
--- lib/libpcap/pcap.3  16 Jul 2012 08:55:48 -  1.33
+++ lib/libpcap/pcap.3  19 Mar 2013 14:25:04 -
@@ -62,6 +62,8 @@
 .Ft int
 .Fn pcap_setdirection pcap_t *p pcap_direction_t dir
 .Ft int
+.Fn pcap_setqueue pcap_t *p u_int32_t qid
+.Ft int
 .Fn pcap_datalink pcap_t *p
 .Ft int
 .Fn pcap_snapshot pcap_t *p
@@ -471,6 +473,16 @@ datalink types.
 .Fn pcap_setdirection
 is used to limit the direction that packets must be flowing in order
 to be captured.
+.Pp
+.Fn pcap_setqueue
+is used to only capture packets that are queued on a particular queue.
+The
+.Fa qid
+argument can be easily obtained using
+.Xr ioctl 2
+interface described in the
+.Xr pf 4
+manual page.
 .Pp
 .Fn pcap_list_datalinks
 returns an array of the supported datalink types for an opened live capture
Index: lib/libpcap/pcap.h
===
RCS file: /cvs/src/lib/libpcap/pcap.h,v
retrieving revision 1.15
diff -u -p -r1.15 pcap.h
--- lib/libpcap/pcap.h  25 May 2012 01:58:08 -  1.15
+++ lib/libpcap/pcap.h  19 Mar 2013 14:25:04 -
@@ -187,6 +187,7 @@ voidpcap_breakloop(pcap_t *);
 intpcap_stats(pcap_t *, struct pcap_stat *);
 intpcap_setfilter(pcap_t *, struct bpf_program *);
 intpcap_setdirection(pcap_t *, pcap_direction_t);
+intpcap_setqueue(pcap_t *, u_int32_t);
 intpcap_getnonblock(pcap_t *, char *);
 intpcap_setnonblock(pcap_t *, int, char *);
 void   pcap_perror(pcap_t *, char *);



Re: make bpf(4) and tcpdump(8) useful with queueing

2013-03-19 Thread Martin Pelikan
  +#define BIOCGQUEUE _IOR('B',126, u_int32_t)
  +#define BIOCSQUEUE _IOW('B',127, u_int32_t)
 
 these ioctls can take u_int to be consistent with how you're using
 them.

Sorry everyone.  I wanted to make it all the same type -- u_int32_t,
and obviously forgot it in the kernel part.

Do you think using u_int is better?  I'll change it in the other diff.

OK now?  Anything else?  If you don't tell people what are they doing
wrong, they'll just keep doing it ;-)
--
Martin Pelikan


Index: usr.sbin/tcpdump/privsep.c
===
RCS file: /cvs/src/usr.sbin/tcpdump/privsep.c,v
retrieving revision 1.30
diff -u -p -r1.30 privsep.c
--- usr.sbin/tcpdump/privsep.c  22 Sep 2011 09:12:30 -  1.30
+++ usr.sbin/tcpdump/privsep.c  19 Mar 2013 16:10:42 -
@@ -18,6 +18,7 @@
  */
 
 #include sys/types.h
+#include sys/ioctl.h
 #include sys/socket.h
 #include sys/wait.h
 
@@ -125,6 +126,41 @@ static voidimpl_getlines(int);
 static voidtest_state(int, int);
 static voidlogmsg(int, const char *, ...);
 
+
+static u_int
+find_queue(const char *device, const char *queue)
+{
+   int fd;
+   u_int32_t q, total;
+   struct pfioc_altq pa;
+
+   if ((fd = open(/dev/pf, O_RDONLY)) == -1)
+   err(2, can't open pf to get queue ID);
+
+   memset(pa, 0, sizeof pa);
+   if (ioctl(fd, DIOCGETALTQS, pa))
+   err(2, DIOCGETALTQS);
+
+   total = pa.nr;
+   for (q = 0; q  total; ++q) {
+   pa.nr = q;
+   if (ioctl(fd, DIOCGETALTQ, pa))
+   err(2, DIOCGETALTQ, number %u, q);
+   if (pa.altq.qid == 0)
+   continue;
+   if (strncmp(pa.altq.ifname, device, sizeof pa.altq.ifname))
+   continue;
+   if (strncmp(pa.altq.qname, queue, sizeof pa.altq.qname))
+   continue;
+   break;
+   }
+   close(fd);
+   if (q == total)
+   warnx(WARNING: no queue %s on interface %s, choosing all,
+   queue, device);
+   return (q == total ? 0 : pa.altq.qid);
+}
+
 int
 priv_init(int argc, char **argv)
 {
@@ -200,7 +236,7 @@ priv_init(int argc, char **argv)
/* parse the arguments for required options */
opterr = 0;
while ((i = getopt(argc, argv,
-   ac:D:deE:fF:i:lLnNOopqr:s:StT:vw:xXy:Y)) != -1) {
+   ac:D:deE:fF:i:lLnNOopQ:qr:s:StT:vw:xXy:Y)) != -1) {
switch (i) {
case 'n':
nflag++;
@@ -319,9 +355,9 @@ static void
 impl_open_bpf(int fd, int *bpfd)
 {
int snaplen, promisc, err;
-   u_int dlt, dirfilt;
-   char device[IFNAMSIZ];
-   size_t iflen;
+   u_int dlt, dirfilt, qid = 0;
+   char device[IFNAMSIZ], queue[PF_QNAME_SIZE];
+   size_t iflen, qlen;
 
logmsg(LOG_DEBUG, [priv]: msg PRIV_OPEN_BPF received);
 
@@ -332,7 +368,10 @@ impl_open_bpf(int fd, int *bpfd)
iflen = read_string(fd, device, sizeof(device), __func__);
if (iflen == 0)
errx(1, Invalid interface size specified);
-   *bpfd = pcap_live(device, snaplen, promisc, dlt, dirfilt);
+   qlen = read_string(fd, queue, sizeof queue, __func__);
+   if (qlen  1)
+   qid = find_queue(device, queue);
+   *bpfd = pcap_live(device, snaplen, promisc, dlt, dirfilt, qid);
err = errno;
if (*bpfd  0)
logmsg(LOG_DEBUG,
Index: usr.sbin/tcpdump/privsep.h
===
RCS file: /cvs/src/usr.sbin/tcpdump/privsep.h,v
retrieving revision 1.7
diff -u -p -r1.7 privsep.h
--- usr.sbin/tcpdump/privsep.h  25 Aug 2009 06:59:17 -  1.7
+++ usr.sbin/tcpdump/privsep.h  19 Mar 2013 16:10:42 -
@@ -47,11 +47,11 @@ int priv_init(int, char **);
 voidpriv_init_done(void);
 
 intsetfilter(int, int, char *);
-intpcap_live(const char *, int, int, u_int, u_int);
+intpcap_live(const char *, int, int, u_int, u_int, u_int);
 
 struct bpf_program *priv_pcap_setfilter(pcap_t *, int, u_int32_t);
 pcap_t *priv_pcap_live(const char *, int, int, int, char *, u_int,
-   u_int);
+   u_int, const char *);
 pcap_t *priv_pcap_offline(const char *, char *);
 
 size_t priv_gethostbyaddr(char *, size_t, int, char *, size_t);
Index: usr.sbin/tcpdump/privsep_pcap.c
===
RCS file: /cvs/src/usr.sbin/tcpdump/privsep_pcap.c,v
retrieving revision 1.17
diff -u -p -r1.17 privsep_pcap.c
--- usr.sbin/tcpdump/privsep_pcap.c 14 Nov 2012 03:33:04 -  1.17
+++ usr.sbin/tcpdump/privsep_pcap.c 19 Mar 2013 16:10:42 -
@@ -173,7 +173,7 @@ priv_pcap_setfilter(pcap_t *hpcap, int o
 /* privileged part of priv_pcap_live */
 int
 pcap_live(const char *device, int snaplen, int promisc, u_int dlt,
-u_int dirfilt)
+u_int dirfilt, u_int qid)
 {
char

Re: nc(1) shutdown(2) typo

2013-03-19 Thread Martin Pelikan
  Yes, but it would even be better if there would be an option to get
  the shutdown on EOF behaviour back.
 
  Some servers wait until they see the shutdown from the client to finish
  their work.
 
 woah. Can somebody explain, using very small words, exactly what the
 problem is?

Once upon a time, there was a bug.  It haunted people across systems for
more than ten years, and people have invented workarounds in fear someone
actually depend on the bug.  That is apparently the some servers otto 
is talking about.  I haven't seen any yet, but some bugs can arise
because people are using nc(1) in their scripts.

Now the bug is gone and there are many opinions about what to do.
I wanted to kill it and tell people to fix their software from now on.
That obviously isn't acceptable if the whole world has adopted the
earlier behavior *and* if this would cause more harm than good.


 From reading the ubuntu bug report, their problem comes from adding a
 -q flag very similar to the proposed -N flag, and *that's* what broke.
 Unmodified netcat does not have whatever bug they're talking about.

Unmodified netcat terminates the wrong socket prematurely (on stdin EOF).
That's the problem.  Some people decided to do their own actions without
bothering to inform upstream.

If someone is commenting on a bug with an option *they* added, it's
*their* problem.  Until I see a real reason why their -q is needed here, 
I'm against.  *Our* problem is people might depend on what -N does.
Otto mentioned that and proposed -N.


 For that matter, if this is a real problem, why are we using -N and
 not -q? This seems like a wholly gratuitous difference for no benefit.

Because it does two different things.  -N preserves the old buggy
behavior and -q does (according to their man page) waiting in seconds.

What does 'q' stand for anyway?
--
Martin Pelikan



nc(1) shutdown(2) typo

2013-03-18 Thread Martin Pelikan
Hi!

Theo pointed out an issue with nc(1), as mentioned in

https://groups.google.com/forum/?hl=enfromgroups=#!topic/muc.lists.freebsd.bugs/0yNFZVHClcI

and

https://bugs.launchpad.net/ubuntu/+source/netcat-openbsd/+bug/544935

that was causing people headaches.

For me, this diff (which seems like a typical copy-paste error) does the
job.  At least with echo 7.6.5.4 | nc whois.ripe.net 43.

Any comments?
--
Martin Pelikan


Index: netcat.c
===
RCS file: /cvs/src/usr.bin/nc/netcat.c,v
retrieving revision 1.110
diff -u -p -r1.110 netcat.c
--- netcat.c12 Mar 2013 02:57:37 -  1.110
+++ netcat.c18 Mar 2013 14:07:25 -
@@ -771,7 +771,7 @@ readwrite(int nfd)
if ((n = read(wfd, buf, plen))  0)
return;
else if (n == 0) {
-   shutdown(nfd, SHUT_WR);
+   shutdown(wfd, SHUT_WR);
pfd[1].fd = -1;
pfd[1].events = 0;
} else {



Re: pf: separate counter for failed translations

2013-02-24 Thread Martin Pelikan
 Currently if no port is available for translation, the memory
 counter is increased, which is not particularly descriptive, I'd
 find it helpful to split this off to a separate counter as it
 clearly shows when the default NAT port range causes a problem.
 
 Any comments/OKs? (it's pretty straightforward, but intended for
 post 5.3).

That's a great idea!

Should this go in, please don't forget pfstat as well (haven't tested
it, though) 
--
Martin Pelikan

--- parse.y.old Thu Jan 11 17:01:58 2007
+++ parse.y Sun Feb 24 22:34:06 2013
@@ -133,7 +133,7 @@
short, normalize, memory, bad-timestamp,
congestion, ip-option, proto-cksum,
state-mismatch, state-insert, state-limit,
-   src-limit, synproxy, 0 };
+   src-limit, synproxy, translate, NULL };
int i;
 
$$.type = 0;
--- pfstat.conf.example.old Sun Feb 24 22:35:54 2013
+++ pfstat.conf.example Sun Feb 24 22:37:22 2013
@@ -6,7 +6,7 @@
 # collect
 #   global
 # states entries|searches|inserts|removals [diff]
-# counters match|bad-offset|fragment|...|synproxy [diff]
+# counters match|bad-offset|fragment|...|translate [diff]
 #  (see pfctl -si output, same strings)
 #   interface name pass|block packets|bytes in|out v4|v6 [diff]
 #   queue name passed|dropped|other packets|bytes|number [diff]
@@ -85,6 +85,7 @@
 collect 27 = global counters state-limitdiff
 collect 28 = global counters src-limit  diff
 collect 29 = global counters synproxy   diff
+collect 30 = global counters translate  diff
 
 image /var/www/htdocs/benzedrine.cx/pfstat-errors.jpg {
from 1 days to now



dhcpd sync counter little endian on the wire

2013-02-09 Thread Martin Pelikan
Hi!

Noone reads this, so noone noticed, but it's just weird.
--
Martin Pelikan

Index: sync.c
===
RCS file: /cvs/src/usr.sbin/dhcpd/sync.c,v
retrieving revision 1.10
diff -u -p -r1.10 sync.c
--- sync.c  23 Dec 2010 17:38:04 -  1.10
+++ sync.c  10 Feb 2013 02:07:11 -
@@ -425,7 +425,7 @@ sync_lease(struct lease *lease)
/* Add DHCP sync packet header */
hdr.sh_version = DHCP_SYNC_VERSION;
hdr.sh_af = AF_INET;
-   hdr.sh_counter = sync_counter++;
+   hdr.sh_counter = htonl(sync_counter++);
hdr.sh_length = htons(sizeof(hdr) + sizeof(ld) + sizeof(end));
iov[i].iov_base = hdr;
iov[i].iov_len = sizeof(hdr);



Re: dhclient(8): fix segfault if calloc()/strdup() return NULL

2012-12-16 Thread Martin Pelikan
On Sun, Dec 16, 2012 at 01:01:49PM -0700, Theo de Raadt wrote:
  know exactly where the memory allocation fails since the error messages
 
 I hate xstrdup() and xcalloc() for exactly that reason.  You don't
 know why it failed.

Actually, you can make them macros using all that underscore gcc magic,
like so:

#define XMALLOC(where, howmany) do {\
if (((where) = malloc(howmany)) == NULL) {  \
err(1, %s: malloc %zu (%s:%d)\n, __FUNCTION__,\
howmany, __FILE__, __LINE__);   \
} } while (0)

Your error messages are exact, .data should be kept small (only one copy
of the string + one string per each function + line numbers in .text),
only thing worrying me is that it doesn't really look pretty in the code.

If you wanted the value of 'howmany' in your error message, you should use
%zu as malloc(3) eats size_t, but regular constants will be of type int
and produce warnings.  Playing with __builtin_constant() will get _really_
ugly, though :-(

Or, if you want to play user-friendly, you can pass a string to that
macro, describing what is the software actually trying to do, in English.
--
Martin Pelikan



awk(1) error messages fix

2012-01-17 Thread Martin Pelikan
Hi!

Here's one copy-paste error in awk(1).
CVS log mentions some upstream; anyone have idea what that is? They may
be interested...

--
Martin Pelikan


Index: run.c
===
RCS file: /cvs/src/usr.bin/awk/run.c,v
retrieving revision 1.33
diff -u -p -r1.33 run.c
--- run.c   28 Sep 2011 19:27:18 -  1.33
+++ run.c   17 Jan 2012 13:42:19 -
@@ -1546,7 +1546,7 @@ Cell *bltin(Node **a, int n)  /* builtin 
break;
case FXOR:
if (nextarg == 0) {
-   WARNING(or requires two arguments; returning 0);
+   WARNING(xor requires two arguments; returning 0);
u = 0;
break;
}
@@ -1557,7 +1557,7 @@ Cell *bltin(Node **a, int n)  /* builtin 
break;
case FLSHIFT:
if (nextarg == 0) {
-   WARNING(or requires two arguments; returning 0);
+   WARNING(lshift requires two arguments; returning 0);
u = 0;
break;
}
@@ -1568,7 +1568,7 @@ Cell *bltin(Node **a, int n)  /* builtin 
break;
case FRSHIFT:
if (nextarg == 0) {
-   WARNING(or requires two arguments; returning 0);
+   WARNING(rshift requires two arguments; returning 0);
u = 0;
break;
}



awk(1) rshift/lshift on the borders of int

2012-01-17 Thread Martin Pelikan
I can't say anything more than 'this solved my problem', which was 

$ echo | awk '{ printf %u\n, rshift(int(0xdeff), 24) }' | bc -e
'obase=16' 
FF80

vs.

$ echo | awk '{ printf %u\n, rshift(int(0xdeff), 24) }' | bc -e
'obase=16'
DE

I hope it starts a discussion (at least), but I don't really know what
the correct way is to these problems.

--
Martin Pelikan


Index: run.c
===
RCS file: /cvs/src/usr.bin/awk/run.c,v
retrieving revision 1.33
diff -u -p -r1.33 run.c
--- run.c   28 Sep 2011 19:27:18 -  1.33
+++ run.c   17 Jan 2012 15:01:32 -
@@ -1562,7 +1562,7 @@ Cell *bltin(Node **a, int n)  /* builtin 
break;
}
y = execute(a[1]-nnext);
-   u = ((int)getfval(x))  ((int)getfval(y));
+   u = ((long long)getfval(x))  ((long long)getfval(y));
tempfree(y);
nextarg = nextarg-nnext;
break;
@@ -1573,7 +1573,7 @@ Cell *bltin(Node **a, int n)  /* builtin 
break;
}
y = execute(a[1]-nnext);
-   u = ((int)getfval(x))  ((int)getfval(y));
+   u = ((long long)getfval(x))  ((long long)getfval(y));
tempfree(y);
nextarg = nextarg-nnext;
break;



inet6 constants for addrs

2011-10-09 Thread Martin Pelikan
Hi,

motivated by many devs over the weekend I looked at some of the inet6
code again.  Easy stuff goes first:

--
Martin Pelikan


Index: in6.c
===
RCS file: /cvs/src/sys/netinet6/in6.c,v
retrieving revision 1.93
diff -u -p -r1.93 in6.c
--- in6.c   8 Aug 2011 13:04:35 -   1.93
+++ in6.c   9 Oct 2011 22:34:56 -
@@ -1046,10 +1046,9 @@ in6_update_ifa(struct ifnet *ifp, struct
bzero(llsol, sizeof(llsol));
llsol.sin6_family = AF_INET6;
llsol.sin6_len = sizeof(llsol);
-   llsol.sin6_addr.s6_addr16[0] = htons(0xff02);
+   llsol.sin6_addr.s6_addr16[0] = IPV6_ADDR_INT16_MLL;
llsol.sin6_addr.s6_addr16[1] = htons(ifp-if_index);
-   llsol.sin6_addr.s6_addr32[1] = 0;
-   llsol.sin6_addr.s6_addr32[2] = htonl(1);
+   llsol.sin6_addr.s6_addr32[2] = IPV6_ADDR_INT32_ONE;
llsol.sin6_addr.s6_addr32[3] =
ifra-ifra_addr.sin6_addr.s6_addr32[3];
llsol.sin6_addr.s6_addr8[12] = 0xff;
Index: in6_ifattach.c
===
RCS file: /cvs/src/sys/netinet6/in6_ifattach.c,v
retrieving revision 1.51
diff -u -p -r1.51 in6_ifattach.c
--- in6_ifattach.c  6 Apr 2010 14:12:10 -   1.51
+++ in6_ifattach.c  9 Oct 2011 22:34:56 -
@@ -343,12 +343,12 @@ in6_ifattach_linklocal(struct ifnet *ifp
 
ifra.ifra_addr.sin6_family = AF_INET6;
ifra.ifra_addr.sin6_len = sizeof(struct sockaddr_in6);
-   ifra.ifra_addr.sin6_addr.s6_addr16[0] = htons(0xfe80);
+   ifra.ifra_addr.sin6_addr.s6_addr16[0] = IPV6_ADDR_INT16_ULL;
ifra.ifra_addr.sin6_addr.s6_addr16[1] = htons(ifp-if_index);
ifra.ifra_addr.sin6_addr.s6_addr32[1] = 0;
if ((ifp-if_flags  IFF_LOOPBACK) != 0) {
ifra.ifra_addr.sin6_addr.s6_addr32[2] = 0;
-   ifra.ifra_addr.sin6_addr.s6_addr32[3] = htonl(1);
+   ifra.ifra_addr.sin6_addr.s6_addr32[3] = IPV6_ADDR_INT32_ONE;
} else {
if (get_ifid(ifp, altifp, ifra.ifra_addr.sin6_addr) != 0) {
nd6log((LOG_ERR,
@@ -553,9 +553,9 @@ in6_nigroup(struct ifnet *ifp, const cha
bzero(sa6, sizeof(*sa6));
sa6-sin6_family = AF_INET6;
sa6-sin6_len = sizeof(*sa6);
-   sa6-sin6_addr.s6_addr16[0] = htons(0xff02);
+   sa6-sin6_addr.s6_addr16[0] = IPV6_ADDR_INT16_MLL;
sa6-sin6_addr.s6_addr16[1] = htons(ifp-if_index);
-   sa6-sin6_addr.s6_addr8[11] = 2;
+   sa6-sin6_addr.s6_addr32[2] = IPV6_ADDR_INT32_TWO;
bcopy(digest, sa6-sin6_addr.s6_addr32[3],
sizeof(sa6-sin6_addr.s6_addr32[3]));
 
Index: nd6.c
===
RCS file: /cvs/src/sys/netinet6/nd6.c,v
retrieving revision 1.87
diff -u -p -r1.87 nd6.c
--- nd6.c   17 Jun 2011 07:06:47 -  1.87
+++ nd6.c   9 Oct 2011 22:34:56 -
@@ -1222,10 +1222,10 @@ nd6_rtrequest(int req, struct rtentry *r
int error;
 
llsol = SIN6(rt_key(rt))-sin6_addr;
-   llsol.s6_addr16[0] = htons(0xff02);
+   llsol.s6_addr16[0] = IPV6_ADDR_INT16_MLL;
llsol.s6_addr16[1] = htons(ifp-if_index);
llsol.s6_addr32[1] = 0;
-   llsol.s6_addr32[2] = htonl(1);
+   llsol.s6_addr32[2] = IPV6_ADDR_INT32_ONE;
llsol.s6_addr8[12] = 0xff;
 
if (in6_addmulti(llsol, ifp, error)) {
@@ -1247,10 +1247,10 @@ nd6_rtrequest(int req, struct rtentry *r
struct in6_multi *in6m;
 
llsol = SIN6(rt_key(rt))-sin6_addr;
-   llsol.s6_addr16[0] = htons(0xff02);
+   llsol.s6_addr16[0] = IPV6_ADDR_INT16_MLL;
llsol.s6_addr16[1] = htons(ifp-if_index);
llsol.s6_addr32[1] = 0;
-   llsol.s6_addr32[2] = htonl(1);
+   llsol.s6_addr32[2] = IPV6_ADDR_INT32_ONE;
llsol.s6_addr8[12] = 0xff;
 
IN6_LOOKUP_MULTI(llsol, ifp, in6m);



inet6 address union 64b access?

2011-10-09 Thread Martin Pelikan
Hi,

would using this ever help anything? On i386 the results are practically
identical (old version doing mov, cmp, mov, cmp vs. new one doing 4 movs
and 2 xors + or + test, perform about the same), but on amd64 the cmpq
version is about 20% faster. I don't know about any other architectures.

If there are some ok's, I could start rewriting stuff in the forwarding
path (like pf_addr_compare).
If there is a reason to keep them all 32bit, it might as well get
documented :-)
--
Martin Pelikan


Index: in6.h
===
RCS file: /cvs/src/sys/netinet6/in6.h,v
retrieving revision 1.53
diff -u -p -r1.53 in6.h
--- in6.h   2 May 2011 13:48:38 -   1.53
+++ in6.h   9 Oct 2011 23:18:16 -
@@ -118,6 +118,7 @@ struct in6_addr {
u_int8_t   __u6_addr8[16];
u_int16_t  __u6_addr16[8];
u_int32_t  __u6_addr32[4];
+   u_int64_t  __u6_addr64[2];
} __u6_addr;/* 128-bit IP6 address */
 };
 
@@ -126,6 +127,7 @@ struct in6_addr {
 #define s6_addr8  __u6_addr.__u6_addr8
 #define s6_addr16 __u6_addr.__u6_addr16
 #define s6_addr32 __u6_addr.__u6_addr32
+#define s6_addr64 __u6_addr.__u6_addr64
 #endif
 
 #define INET6_ADDRSTRLEN   46



Re: device IDs from Gigabyte H61 board

2011-09-04 Thread Martin Pelikan
On Sun, Sep 04, 2011 at 09:50:26PM +1000, Jonathan Gray wrote:
 The H61 LPC correction is fine but do you actually have
 this 0x1c51 device?  It doesn't appear in the Intel 6 series
 datasheets, perhaps just stick with the correction.

No, I don't have that one. I didn't know Intel releases this stuff
officially, so thanks for the info.


On Sun, Sep 04, 2011 at 02:05:42PM +0200, Mark Kettenis wrote:
   product INTEL2 RMH 0x0020  Rate Matching Hub
  +product INTEL2 RMH_SANDYBRIDGE 0x0024  Rate Matching Hub
 
 The hub is part of the PCH rather than the CPU, so the Sandybridge
 codename doesn't apply.  I'll see if I can come up with a better
 naming scheme.

What I meant was 'the one found on chipset made for Sandy Bridges'.
RMH_1 and RMH_2 as suggested by jsg is probably better idea, as I don't
know their existence on chipsets for sure.
--
Martin Pelikan


Index: dev/usb/usbdevs
===
RCS file: /cvs/src/sys/dev/usb/usbdevs,v
retrieving revision 1.553
diff -u -p -r1.553 usbdevs
--- dev/usb/usbdevs 29 Aug 2011 10:51:18 -  1.553
+++ dev/usb/usbdevs 4 Sep 2011 12:55:38 -
@@ -2049,7 +2049,8 @@ product INSYSTEM USBCABLE 0x081a  USB cab
 product INSYSTEM ADAPTERV2 0x5701  USB Storage Adapter V2
 
 /* Intel products */
-product INTEL2 RMH 0x0020  Rate Matching Hub
+product INTEL2 RMH_1   0x0020  Rate Matching Hub
+product INTEL2 RMH_2   0x0024  Rate Matching Hub
 product INTEL EASYPC_CAMERA0x0110  EasyPC
 product INTEL AP3100x0200  AP310 AnyPoint II
 product INTEL I2011B   0x  Wireless 2011B



device IDs from Gigabyte H61 board

2011-09-03 Thread Martin Pelikan
Hi!

I'm working with Gigabyte H61M-S2V-B3 right now. Graphics built in CPU
supports at most 800x600 in X.org, but the rest of the machine seems
fine. Dmesg at dmesg@ and at NYCBUG database.

However, these are missing (fixed with the aid of UCW's database).

--
Martin Pelikan


Index: dev/pci/pcidevs
===
RCS file: /cvs/src/sys/dev/pci/pcidevs,v
retrieving revision 1.1618
diff -u -p -r1.1618 pcidevs
--- dev/pci/pcidevs 30 Aug 2011 15:57:10 -  1.1618
+++ dev/pci/pcidevs 3 Sep 2011 23:04:45 -
@@ -2603,10 +2603,11 @@ product INTEL QS67_LPC  0x1c4d  QS67 LPC
 product INTEL Q67_LPC  0x1c4e  Q67 LPC
 product INTEL QM67_LPC 0x1c4f  QM67 LPC
 product INTEL B65_LPC  0x1c50  B65 LPC
-product INTEL H61_LPC  0x1c51  H61 LPC
+product INTEL 6SERIES_LPC  0x1c51  6 Series LPC
 product INTEL C202_LPC 0x1c52  C202 LPC
 product INTEL C204_LPC 0x1c54  C204 LPC
 product INTEL C206_LPC 0x1c56  C206 LPC
+product INTEL H61_LPC  0x1c5c  H61 LPC
 product INTEL 82801AA_LPC  0x2410  82801AA LPC
 product INTEL 82801AA_IDE  0x2411  82801AA IDE
 product INTEL 82801AA_USB  0x2412  82801AA USB
Index: dev/usb/usbdevs
===
RCS file: /cvs/src/sys/dev/usb/usbdevs,v
retrieving revision 1.553
diff -u -p -r1.553 usbdevs
--- dev/usb/usbdevs 29 Aug 2011 10:51:18 -  1.553
+++ dev/usb/usbdevs 3 Sep 2011 23:04:46 -
@@ -2050,6 +2050,7 @@ product INSYSTEM ADAPTERV20x5701  USB St
 
 /* Intel products */
 product INTEL2 RMH 0x0020  Rate Matching Hub
+product INTEL2 RMH_SANDYBRIDGE 0x0024  Rate Matching Hub
 product INTEL EASYPC_CAMERA0x0110  EasyPC
 product INTEL AP3100x0200  AP310 AnyPoint II
 product INTEL I2011B   0x  Wireless 2011B



disklabel(8) typo

2011-09-03 Thread Martin Pelikan
Index: disklabel.8
===
RCS file: /cvs/src/sbin/disklabel/disklabel.8,v
retrieving revision 1.103
diff -u -p -r1.103 disklabel.8
--- disklabel.8 5 Jun 2011 11:57:17 -   1.103
+++ disklabel.8 3 Sep 2011 23:45:26 -
@@ -163,7 +163,7 @@ Write entries to
 in
 .Xr fstab 5
 format for any partitions for which mount point information is known.
-The entries will written using disklabel UIDs.
+The entries will be written using disklabel UIDs.
 The
 .Fl F
 flag is only valid when used in conjunction with the



altq oddities

2011-08-24 Thread Martin Pelikan
Hi,
this is just another bunch of typos, but it breaks any attempts to write
altq discipline independent macros, for functions using statistics.
giovanni pointed out net/pfstat uses this too, so if you know of other
ports, either let me know or fix them.

Anyone willing to commit?
Thanks.
--
Martin Pelikan

Index: sbin/pfctl/pfctl_qstats.c
===
RCS file: /cvs/src/sbin/pfctl/pfctl_qstats.c,v
retrieving revision 1.32
diff -u -p -r1.32 pfctl_qstats.c
--- sbin/pfctl/pfctl_qstats.c   4 Jul 2011 22:49:03 -   1.32
+++ sbin/pfctl/pfctl_qstats.c   25 Aug 2011 00:29:20 -
@@ -297,7 +297,7 @@ print_cbqstats(struct queue_stats cur)
(unsigned long long)cur.data.cbq_stats.drop_cnt.packets,
(unsigned long long)cur.data.cbq_stats.drop_cnt.bytes);
printf(  [ qlength: %3d/%3d  borrows: %6u  suspends: %6u ]\n,
-   cur.data.cbq_stats.qcnt, cur.data.cbq_stats.qmax,
+   cur.data.cbq_stats.qlength, cur.data.cbq_stats.qmax,
cur.data.cbq_stats.borrows, cur.data.cbq_stats.delays);
 
if (cur.avgn  2)
@@ -313,10 +313,10 @@ print_priqstats(struct queue_stats cur)
 {
printf(  [ pkts: %10llu  bytes: %10llu  
dropped pkts: %6llu bytes: %6llu ]\n,
-   (unsigned long long)cur.data.priq_stats.xmitcnt.packets,
-   (unsigned long long)cur.data.priq_stats.xmitcnt.bytes,
-   (unsigned long long)cur.data.priq_stats.dropcnt.packets,
-   (unsigned long long)cur.data.priq_stats.dropcnt.bytes);
+   (unsigned long long)cur.data.priq_stats.xmit_cnt.packets,
+   (unsigned long long)cur.data.priq_stats.xmit_cnt.bytes,
+   (unsigned long long)cur.data.priq_stats.drop_cnt.packets,
+   (unsigned long long)cur.data.priq_stats.drop_cnt.bytes);
printf(  [ qlength: %3d/%3d ]\n,
cur.data.priq_stats.qlength, cur.data.priq_stats.qlimit);
 
@@ -381,8 +381,8 @@ update_avg(struct pf_altq_node *a)
p = qs-data.cbq_stats.xmit_cnt.packets;
break;
case ALTQT_PRIQ:
-   b = qs-data.priq_stats.xmitcnt.bytes;
-   p = qs-data.priq_stats.xmitcnt.packets;
+   b = qs-data.priq_stats.xmit_cnt.bytes;
+   p = qs-data.priq_stats.xmit_cnt.packets;
break;
case ALTQT_HFSC:
b = qs-data.hfsc_stats.xmit_cnt.bytes;
Index: sys/altq/altq_cbq.c
===
RCS file: /cvs/src/sys/altq/altq_cbq.c,v
retrieving revision 1.26
diff -u -p -r1.26 altq_cbq.c
--- sys/altq/altq_cbq.c 3 Jul 2011 23:59:43 -   1.26
+++ sys/altq/altq_cbq.c 25 Aug 2011 00:29:20 -
@@ -173,7 +173,7 @@ get_class_stats(class_stats_t *statsp, s
statsp-qmax= qlimit(cl-q_);
statsp-ns_per_byte = cl-ns_per_byte_;
statsp-wrr_allot   = cl-w_allotment_;
-   statsp-qcnt= qlen(cl-q_);
+   statsp-qlength = qlen(cl-q_);
statsp-avgidle = cl-avgidle_;
 
statsp-qtype   = qtype(cl-q_);
Index: sys/altq/altq_cbq.h
===
RCS file: /cvs/src/sys/altq/altq_cbq.h,v
retrieving revision 1.13
diff -u -p -r1.13 altq_cbq.h
--- sys/altq/altq_cbq.h 4 Jul 2011 01:07:43 -   1.13
+++ sys/altq/altq_cbq.h 25 Aug 2011 00:29:20 -
@@ -81,7 +81,7 @@ typedef struct _cbq_class_stats_ {
int ns_per_byte;
int wrr_allot;
 
-   int qcnt;   /* # packets in queue */
+   int qlength;/* # packets in queue */
int avgidle;
 
/* red and rio related info */
Index: sys/altq/altq_priq.c
===
RCS file: /cvs/src/sys/altq/altq_priq.c,v
retrieving revision 1.24
diff -u -p -r1.24 altq_priq.c
--- sys/altq/altq_priq.c3 Jul 2011 23:59:43 -   1.24
+++ sys/altq/altq_priq.c25 Aug 2011 00:29:21 -
@@ -477,8 +477,8 @@ get_class_stats(struct priq_classstats *
sp-qlength = qlen(cl-cl_q);
sp-qlimit = qlimit(cl-cl_q);
sp-period = cl-cl_period;
-   sp-xmitcnt = cl-cl_xmitcnt;
-   sp-dropcnt = cl-cl_dropcnt;
+   sp-xmit_cnt = cl-cl_xmitcnt;
+   sp-drop_cnt = cl-cl_dropcnt;
 
sp-qtype = qtype(cl-cl_q);
 #ifdef ALTQ_RED
Index: sys/altq/altq_priq.h
===
RCS file: /cvs/src/sys/altq/altq_priq.h,v
retrieving revision 1.8
diff -u -p -r1.8 altq_priq.h
--- sys/altq/altq_priq.h3 Jul 2011 23:48:41 -   1.8
+++ sys/altq/altq_priq.h25 Aug 2011 00:29:21 -
@@ -54,8 +54,8 @@ struct priq_classstats {
u_int   qlength;
u_int   qlimit;
u_int   period;
-   struct pktcntr  xmitcnt

unbreak ps -N/-M

2011-08-18 Thread Martin Pelikan
Hi!

I wanted to look at the process table of a crashed kernel from a dump.
ps(1) then segfaulted.

It seems that

 1) pr-ps_pgrp is kernel's (it's being kvm_read earlier in libkvm)
and so is ps_session
 2) p_comm and s_login are part of their structs and not pointers,
therefore kvm_read() a. k. a. copy_str() doesn't work on them.

I couldn't fix argv to display properly, but this is already better than
a segfault. Tested only on i386.

Reproduce:
 1) C-M-Esc, boot crash
 2) ps -N /var/crash/bsd.? -M /var/crash/bsd.?.core
 3) when trying the diff, don't forget to recompile libkvm AND ps

Anyone has anything to say or is willing to commit?

--
Martin Pelikan


Index: sys/sysctl.h
===
RCS file: /cvs/src/sys/sys/sysctl.h,v
retrieving revision 1.116
diff -u -p -r1.116 sysctl.h
--- sys/sysctl.h8 Jul 2011 18:38:55 -   1.116
+++ sys/sysctl.h18 Aug 2011 11:16:35 -
@@ -482,7 +482,7 @@ do {
\
(kp)-p_limit = PTRTOINT64((pr)-ps_limit); \
(kp)-p_vmspace = PTRTOINT64((p)-p_vmspace);   \
(kp)-p_sigacts = PTRTOINT64((p)-p_sigacts);   \
-   (kp)-p_sess = PTRTOINT64((pr)-ps_session);\
+   (kp)-p_sess = PTRTOINT64(sess);\
(kp)-p_ru = PTRTOINT64((p)-p_ru); \
\
(kp)-p_exitsig = (p)-p_exitsig;   \
@@ -528,11 +528,11 @@ do {  
\
(kp)-p_xstat = (p)-p_xstat;   \
(kp)-p_acflag = (p)-p_acflag; \
\
-   /* XXX depends on p_emul being an array and not a pointer */\
+   /* p_emul is a pointer and not an array */  \
copy_str((kp)-p_emul, (char *)(p)-p_emul +\
offsetof(struct emul, e_name), sizeof((kp)-p_emul));   \
-   copy_str((kp)-p_comm, (p)-p_comm, sizeof((kp)-p_comm));  \
-   copy_str((kp)-p_login, (sess)-s_login,\
+   (void) memcpy((kp)-p_comm, (p)-p_comm, sizeof((kp)-p_comm)); \
+   (void) memcpy((kp)-p_login, (sess)-s_login,   \
MIN(sizeof((kp)-p_login) - 1, sizeof((sess)-s_login)));   \
\
if ((sess)-s_ttyvp)\



kvm_read(3) typographic fix

2011-08-18 Thread Martin Pelikan
Hi,

kvm_open(3) is a cross-reference, not a function call with 3.
Plus, why do we mention the same page four times?

--
Martin Pelikan

Index: lib/libkvm/kvm_read.3
===
RCS file: /cvs/src/lib/libkvm/kvm_read.3,v
retrieving revision 1.7
diff -u -p -r1.7 kvm_read.3
--- lib/libkvm/kvm_read.3   31 May 2007 19:19:35 -  1.7
+++ lib/libkvm/kvm_read.3   18 Aug 2011 12:16:19 -
@@ -55,9 +55,7 @@ and
 functions are used to read and write kernel virtual memory (or a crash
 dump file).
 See
-.Fn kvm_open 3
-or
-.Fn kvm_openfiles 3
+.Xr kvm_open 3
 for information regarding opening kernel virtual memory and crash dumps.
 .Pp
 The



impossible panic() in ifafree()

2011-07-22 Thread Martin Pelikan
Hi,

this panic cannot possibly happen. The question is, whether to move it up
to the macro that is used in the code, or zap it entirely.
The whole logic in these two seems pretty ugly, but inet6 crashed when I 
tried to make it look prettier and more uniform.  And again, until 5.0 is
out, this might just sit here, or wait on someone's todo list.

--
Martin Pelikan

Index: net/if.h
===
RCS file: /cvs/src/sys/net/if.h,v
retrieving revision 1.128
diff -u -p -r1.128 if.h
--- net/if.h8 Jul 2011 18:48:51 -   1.128
+++ net/if.h22 Jul 2011 12:14:01 -
@@ -696,6 +696,8 @@ __END_DECLS
 #ifdef _KERNEL
 #defineIFAFREE(ifa) \
 do { \
+   if (ifa == NULL) \
+   panic(IFAFREE); \
if ((ifa)-ifa_refcnt = 0) \
ifafree(ifa); \
else \
Index: net/route.c
===
RCS file: /cvs/src/sys/net/route.c,v
retrieving revision 1.131
diff -u -p -r1.131 route.c
--- net/route.c 4 Jul 2011 04:29:17 -   1.131
+++ net/route.c 22 Jul 2011 12:14:01 -
@@ -411,8 +411,6 @@ rtfree(struct rtentry *rt)
 void
 ifafree(struct ifaddr *ifa)
 {
-   if (ifa == NULL)
-   panic(ifafree);
if (ifa-ifa_refcnt == 0)
free(ifa, M_IFADDR);
else



rtableid typo across kernel

2011-07-22 Thread Martin Pelikan
Hi,

ugly typo; anyone thinks this should survive to the upcoming release?

--
Martin Pelikan

Index: net/pf.c
===
RCS file: /cvs/src/sys/net/pf.c,v
retrieving revision 1.764
diff -u -p -r1.764 pf.c
--- net/pf.c9 Jul 2011 17:42:19 -   1.764
+++ net/pf.c22 Jul 2011 11:45:45 -
@@ -2592,7 +2592,7 @@ pf_get_mss(struct mbuf *m, int off, u_in
 }
 
 u_int16_t
-pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtabelid, u_int16_t 
offer)
+pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t 
offer)
 {
 #ifdef INET
struct sockaddr_in  *dst;
@@ -2615,7 +2615,7 @@ pf_calc_mss(struct pf_addr *addr, sa_fam
dst-sin_family = AF_INET;
dst-sin_len = sizeof(*dst);
dst-sin_addr = addr-v4;
-   ro.ro_tableid = rtabelid;
+   ro.ro_tableid = rtableid;
rtalloc_noclone(ro);
rt = ro.ro_rt;
break;
@@ -2628,7 +2628,7 @@ pf_calc_mss(struct pf_addr *addr, sa_fam
dst6-sin6_family = AF_INET6;
dst6-sin6_len = sizeof(*dst6);
dst6-sin6_addr = addr-v6;
-   ro6.ro_tableid = rtabelid;
+   ro6.ro_tableid = rtableid;
rtalloc_noclone((struct route *)ro6);
rt = ro6.ro_rt;
break;
Index: net/radix.c
===
RCS file: /cvs/src/sys/net/radix.c,v
retrieving revision 1.28
diff -u -p -r1.28 radix.c
--- net/radix.c 22 Aug 2010 17:02:04 -  1.28
+++ net/radix.c 22 Jul 2011 11:45:45 -
@@ -986,7 +986,7 @@ rn_walktree(struct radix_node_head *h, i
while ((rn = base) != NULL) {
base = rn-rn_dupedkey;
if (!(rn-rn_flags  RNF_ROOT) 
-   (error = (*f)(rn, w, h-rnh_rtabelid)))
+   (error = (*f)(rn, w, h-rnh_rtableid)))
return (error);
}
rn = next;
Index: net/radix.h
===
RCS file: /cvs/src/sys/net/radix.h,v
retrieving revision 1.16
diff -u -p -r1.16 radix.h
--- net/radix.h 28 Jun 2010 18:50:37 -  1.16
+++ net/radix.h 22 Jul 2011 11:45:45 -
@@ -128,7 +128,7 @@ struct radix_node_head {
 int (*)(struct radix_node *, void *, u_int), void *);
struct  radix_node rnh_nodes[3];/* empty tree for common case */
int rnh_multipath;  /* multipath? */
-   u_int   rnh_rtabelid;
+   u_int   rnh_rtableid;
 };
 
 #ifdef _KERNEL
Index: net/route.c
===
RCS file: /cvs/src/sys/net/route.c,v
retrieving revision 1.131
diff -u -p -r1.131 route.c
--- net/route.c 4 Jul 2011 04:29:17 -   1.131
+++ net/route.c 22 Jul 2011 11:45:46 -
@@ -197,7 +197,7 @@ rtable_init(struct radix_node_head ***ta
 
for (i = 0; i  rtafidx_max; i++) {
if ((*table)[i] != NULL)
-   (*table)[i]-rnh_rtabelid = id;
+   (*table)[i]-rnh_rtableid = id;
}
 
return (0);



Re: ospf6d crashes on interface add

2011-06-01 Thread Martin Pelikan
Hi!

So my last attempt wasn't met with much appreciation, so I'll slow down:

- when I add a vlan(4), vether(4) or something, my ospf6d dies
- I don't like that
- therefore, with new release comes new diff
- this time I tried to make it as small as possible, in separate parts

It doesn't fix departures (if_leave_group jumps on a non-existing
interface), but I guess people don't remove interfaces that often. 
I will rewrite and post the rest of the big one if someone shows
interest in this bit.

--
Martin Pelikan


Index: kroute.c
===
RCS file: /cvs/src/usr.sbin/ospf6d/kroute.c,v
retrieving revision 1.30
diff -u -p -r1.30 kroute.c
--- kroute.c7 Mar 2011 07:43:02 -   1.30
+++ kroute.c1 Jun 2011 20:36:31 -
@@ -973,15 +973,22 @@ if_announce(void *msg)
if ((iface = if_new(ifan-ifan_index, ifan-ifan_name)) == NULL)
fatal(if_announce failed);
iface-cflags |= F_IFACE_AVAIL;
+   main_imsg_compose_rde(IMSG_IFADD, 0,
+   iface, sizeof(struct iface));
+   main_imsg_compose_ospfe(IMSG_IFADD, 0,
+   iface, sizeof(struct iface));
break;
case IFAN_DEPARTURE:
-   iface = if_find(ifan-ifan_index);
-   if (iface-cflags  F_IFACE_CONFIGURED) {
-   main_imsg_compose_rde(IMSG_IFDELETE, 0,
-   iface-ifindex, sizeof(iface-ifindex));
-   main_imsg_compose_ospfe(IMSG_IFDELETE, 0,
-   iface-ifindex, sizeof(iface-ifindex));
+   if ((iface = if_find(ifan-ifan_index)) == NULL)  {
+   log_warn(someone announced departure of non-existing 
+   interface %s - possible race condition?,
+   ifan-ifan_name);
+   return;
}
+   main_imsg_compose_rde(IMSG_IFDELETE, 0,
+   iface-ifindex, sizeof(iface-ifindex));
+   main_imsg_compose_ospfe(IMSG_IFDELETE, 0,
+   iface-ifindex, sizeof(iface-ifindex));
if_del(iface);
break;
}
Index: ospfe.c
===
RCS file: /cvs/src/usr.sbin/ospf6d/ospfe.c,v
retrieving revision 1.34
diff -u -p -r1.34 ospfe.c
--- ospfe.c 2 May 2011 09:24:00 -   1.34
+++ ospfe.c 1 Jun 2011 20:36:31 -
@@ -299,16 +299,16 @@ ospfe_dispatch_main(int fd, short event,
}
break;
case IMSG_IFADD:
-   if ((iface = malloc(sizeof(struct iface))) == NULL)
-   fatal(NULL);
-   memcpy(iface, imsg.data, sizeof(struct iface));
+   ifp = imsg.data;
+   if ((iface = if_new(ifp-ifindex, ifp-name)) == NULL)
+   fatalx(IFADD in ospfe);
 
-   LIST_INIT(iface-nbr_list);
-   TAILQ_INIT(iface-ls_ack_list);
-   RB_INIT(iface-lsa_tree);
+   /* ifaces from config belong to some area */
+   if ((iface-cflags  F_IFACE_CONFIGURED) 
+   (area = area_find(oeconf, iface-area_id)) != NULL)
+   LIST_INSERT_HEAD(area-iface_list, iface,
+   entry);
 
-   area = area_find(oeconf, iface-area_id);
-   LIST_INSERT_HEAD(area-iface_list, iface, entry);
break;
case IMSG_IFDELETE:
if (imsg.hdr.len != IMSG_HEADER_SIZE +
Index: rde.c
===
RCS file: /cvs/src/usr.sbin/ospf6d/rde.c,v
retrieving revision 1.52
diff -u -p -r1.52 rde.c
--- rde.c   5 May 2011 15:58:02 -   1.52
+++ rde.c   1 Jun 2011 20:36:31 -
@@ -623,7 +623,7 @@ rde_dispatch_parent(int fd, short event,
 {
static struct area  *narea;
struct area *area;
-   struct iface*iface;
+   struct iface*iface, *ifp;
struct ifaddrchange *ifc;
struct iface_addr   *ia, *nia;
struct imsg  imsg;
@@ -710,16 +710,16 @@ rde_dispatch_parent(int fd, short event,
0, -1, kr, sizeof(kr));
break;
case IMSG_IFADD:
-   if ((iface = malloc(sizeof(struct iface))) == NULL)
-   fatal(NULL);
-   memcpy(iface, imsg.data, sizeof(struct iface));
-
-   LIST_INIT(iface-nbr_list);
-   TAILQ_INIT(iface-ls_ack_list);
-   RB_INIT(iface-lsa_tree);
+   ifp = imsg.data

pf: overload rules can cause crashes

2011-05-15 Thread Martin Pelikan
Hello tech@

I got a GPF on OpenBSD/amd64 4.8-stable GENERIC.MP from Mar 22 17:42:14.
The only thing I did to it was bumping HFSC_MAX_CLASSES from 64 to 1024.
It happened a few seconds after the ruleset reload (shell script running
pfctl -f /etc/pf.conf at the end)


kernel: protection fault trap, code=0
Stopped at  rn_match+0x1b: movl 0x18(%rsi),%ecx
ddb{0} trace
rn_match() at rn_match+0x1b
pfr_match_addr() at pfr_match_addr+0xcb
pf_test_rule() at pf_test_rule+0x502
pf_test() at pf_test+0x10eb
ip_output() at ip_output+0x416
ip_forward() at ip_forward+0x16f
ipv4_input() at ipv4_input+0x66d
ipintr() at ipintr+0x51
Xsoftnet() at Xsoftnet+0x4a
--- interrupt ---
end trace frame: 0x0, count: -9
0x8:

onproc was idle0

ddb{0} sh reg
ds  0x30
es  0x101
fs  0x49b8
gs  0
rdi 0x80cf3700  pfr_sin
rsi 0xdead0005deadbeef
rbp 0x8000154b6800
rbx 0xfe807df1c040
rdx 0x2
rcx 0x8000154b6a90
rax 0x60330a0a
r8  0x8000154b6968
r9  0xfe807d2dc648
r10 0
r11 0x8000154b6a70
r12 0xfe8071756548
r13 0x2
r14 0x80193e00
r15 0x80cf3700  pfr_sin
rip 0x803190bb  rn_match+0x1b
cs  0x8
rflags  0x10282
rsp 0x8000154b67c8
ss  0x10

The RSI quite reminds me the ifindex embedded in link-local IPv6 addr :-)

pfr_match_addr() was so kind it saved me an IP address in RAX
- 10.10.51.96 is our well-known spammer. That pointed me to this thing
we had in pf.conf: (I remember hitting pfctl grammar limits back then)

anchor spam on $ext_if proto tcp from ! smtp-white \
to ! $our_smtp_server port 25 {
block quick from spammers
# something because of ABUSE my friend set up for him
block log(all, to pflog1) quick to 62.67.240.16
block log(all, to pflog1) quick to 62.67.241.15
block log(all, to pflog1) quick to 62.67.194.35
pass out quick from ! $our_smtp_server keep state   \
(max-src-conn-rate 5/60 overload spammers)\
queue (servers, lowdelay) nat-to $masq_addr
}

The table spammers is not defined anywhere, because pfctl grammar won't
let me (or it complains about namespace collision, if defined 'global').

After a couple of days of poking at the code I went through all calls of
pfr_destroy_ktable() and finally found the responsible one.  The ktable
reference counting is broken.  The whole overload table vanishes without
asking or looking whether it is used anywhere.


The crash is actually quite easy to reproduce (won't take more than 5min):

0. Make sure no network connection interferes with your intended testing
Watching ntpd silently increasing src_node life for 15 minutes is no fun.

1. Create some really simple ruleset:

pass out quick keep state (max-src-conn-rate 1/60, overload potazmo)

You can explicitly name the table before (but inside an anchor you can't).
It just can't be persist, those tables obviously work fine.
// overload table refcnt[RULE] will be 1

2. Create two TCP connections (=cause the overload).

3. Reload the firewall (pfctl -f /etc/pf.conf). // refcnt increases to 2

4. Overload again.

5. Wait for the first src_node to expire (watch pfctl -vvsS)
// refcnt decreases to 1

6. Reload the firewall (you HAVE TO do it BEFORE second src_node expires)
// refcnt increases to 2 and decreases back to 1 as the rules change

7. Wait for the second src_node to expire.
// refcnt decreases to zero! table decides to vanish

8. Overload again.


The first part of this diff doesn't fix this.  The original code just
didn't make any sense.  The second part does. Both look like some pretty
stupid typos...

As this introduces a way to remotely crash a firewall from places
protected by rate limit (if the attacker waits for us to reload ruleset
and then just keeps trying opening conenections) and these parts of code
are quite critical, this probably needs heavier testing than my laptop
and Alix board.

Any comments? Corrections? Input?
Thanks in advance.

--
Martin Pelikan


Index: net/pf.c
===
RCS file: /cvs/src/sys/net/pf.c,v
retrieving revision 1.742
diff -u -p -r1.742 pf.c
--- net/pf.c24 Apr 2011 19:36:54 -  1.742
+++ net/pf.c15 May 2011 23:03:42 -
@@ -593,7 +593,7 @@ pf_remove_src_node(struct pf_src_node *s
if (sn-rule.ptr != NULL) {
sn-rule.ptr-src_nodes--;
if (sn-rule.ptr-states_cur = 0 
-   sn-rule.ptr-max_src_nodes = 0)
+   sn-rule.ptr-src_nodes = 0)
pf_rm_rule(NULL, sn-rule.ptr);
RB_REMOVE(pf_src_tree, tree_src_tracking, sn);
pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
Index: net/pf_ioctl.c
===
RCS file: /cvs/src/sys/net/pf_ioctl.c,v
retrieving revision 1.239
diff -u -p -r1.239 pf_ioctl.c
--- net/pf_ioctl.c

Re: significant speedups

2011-04-20 Thread Martin Pelikan
2011/4/20 Theo de Raadt dera...@cvs.openbsd.org:
 some of you can start cranking the kern.bufcachepercent sysctl to 90.
 try it out. B be impressed, but don't report deadlocks or lockups;
 however real crashes are worth knowing about. B the lockup situations
 are largely understood and will be slowly worked out, which is why
 this is not close to being the default.

I also did that now almost for a year, without a glitch. If anyone has
some more dangerous diffs, I'm happy to test (but I have only 1G of
RAM which makes it pointless for the bigmem changes I guess. Thanks
for that.

 the other change is that interrupt servicing times have gone down
 substantially. B there are some broken drivers we will need to find

I presume it's the diffs regarding return values (intr.c, vector.S and
others). I asked a few people about their opinion of Message Signalled
Interrupts - there's an interesting paper about interrupt routing from
FreeBSD that sort of displays how fucked up this stuff on x86 really
can be. Does this diff solve the problem when PCIe cardx intr() was
triggered the same as ppb's? Or do we need something more?
Since the road from network interrupt handlers to Xsoftintr is quite
long, I guess locking issues will force you to stay to the 1 CPU for
all interrupts model for quite a while, even for hardware like ix(4).
Can you describe the developer's attitude about running kernel stuff
on more CPUs? Lots of hard work, or is there even more to it?

 and fix, so please watch your vmstat -i numbers carefully,

I never quite understood what is the total number useful for. For
how long and how heavily has the system been used? What do you compare
that to, if you use it to make some statement?

Nevertheless, thanks for all the good work!
--
Martin Pelikan



Re: ospf6d crashes on interface add

2011-02-09 Thread Martin Pelikan
On Wed, Jan 26, 2011 at 04:44:30PM +0100, Martin Pelikan wrote:
 Hello tech@, claudio@ in particular,
 this diff corrects the behavior of ospf6d when:
 - vlan/vether/? interface is added with some inet6 address
 - a configured interface is removed (please test with usb devs)
 In both cases the daemon crashes.  Also, it contains various fixes
 
 Desired behavior:
 - remove if - withdraw all its routes, but keep running
 - add previously configured if - announce all its routes back
 - add unconfigured if - get everyone to know about it without crashing

Even in the recent -beta one can crash ospf6d easily, just by typing
# ifconfig vlan7 vlan 7 vlandev your-if
even in the simplest of possible setups.

I run this diff on a production router where vlans are being added
dynamically from admin's web page, without any problem for two weeks.
Is there any chance people won't have to patch tne 4.9-release? :-)
What's the problem? Is it too complicated or have I missed something
again?

Thanks for any feedback.

--
Martin Pelikan



ospf6d crashes on interface add

2011-01-26 Thread Martin Pelikan
Hello tech@, claudio@ in particular,
this diff corrects the behavior of ospf6d when:
- vlan/vether/? interface is added with some inet6 address
- a configured interface is removed (please test with usb devs)
In both cases the daemon crashes.  Also, it contains various fixes
useful for debugging, cleans up and removes a possible memory leak.
The patched version adds all interfaces into iflist in all three
processes, which might be useful in the future when the user reloads
configuration.

Known problems:
- Crazy user types for i = 1 to 1000 do ifconfig vlan$i create and
  then destroy - some interfaces get stuck and if_find says it can't
  find them. I couldn't find out the cause of this. An error prints.
- When you destroy a configured vlan interface, its routes won't
  withdraw. They are marked with DOWN flag, but then the parent sends
  IMSG_NETWORK_ADD, which leaves me with the question about purpose of
  IMSG_NETWORK_DEL. Any ideas? Or an elegant diff?

Desired behavior:
- remove if - withdraw all its routes, but keep running
- add previously configured if - announce all its routes back
- add unconfigured if - get everyone to know about it without crashing

Comments? Corrections? Thanks in advance.
--
Martin Pelikan


Index: area.c
===
RCS file: /cvs/src/usr.sbin/ospf6d/area.c,v
retrieving revision 1.4
diff -u -p -r1.4 area.c
--- area.c  28 Dec 2008 20:08:31 -  1.4
+++ area.c  26 Jan 2011 15:20:38 -
@@ -56,7 +56,6 @@ area_del(struct area *area)
 
/* clean lists */
while ((iface = LIST_FIRST(area-iface_list)) != NULL) {
-   LIST_REMOVE(iface, entry);
if_del(iface);
}
 
Index: interface.c
===
RCS file: /cvs/src/usr.sbin/ospf6d/interface.c,v
retrieving revision 1.15
diff -u -p -r1.15 interface.c
--- interface.c 20 Sep 2009 20:45:06 -  1.15
+++ interface.c 26 Jan 2011 15:20:38 -
@@ -27,6 +27,7 @@
 #include net/if_types.h
 #include ctype.h
 #include err.h
+#include errno.h
 #include stddef.h
 #include stdio.h
 #include stdlib.h
@@ -205,7 +206,7 @@ if_new(u_short ifindex, char *ifname)
struct iface*iface;
 
if ((iface = calloc(1, sizeof(*iface))) == NULL)
-   err(1, if_new: calloc);
+   return (NULL);
 
iface-state = IF_STA_DOWN;
 
@@ -259,7 +260,7 @@ if_del(struct iface *iface)
 {
struct nbr  *nbr = NULL;
 
-   log_debug(if_del: interface %s, iface-name);
+   log_debug(if_del(pid %d): interface %s, getpid(), iface-name);
 
/* revert the demotion when the interface is deleted */
if ((iface-state  (IF_STA_MULTI | IF_STA_POINTTOPOINT)) == 0)
@@ -277,6 +278,11 @@ if_del(struct iface *iface)
evtimer_del(iface-lsack_tx_timer);
 
ls_ack_list_clr(iface);
+
+   /* interfaces from config used to belong to some area */
+   if (iface-cflags  F_IFACE_CONFIGURED)
+   LIST_REMOVE(iface, entry);
+
TAILQ_REMOVE(iflist, iface, list);
free(iface);
 }
@@ -728,8 +734,8 @@ if_join_group(struct iface *iface, struc
mreq.ipv6mr_multiaddr = *addr;
mreq.ipv6mr_interface = iface-ifindex;
 
-   if (setsockopt(iface-fd, IPPROTO_IPV6, IPV6_JOIN_GROUP,
-   mreq, sizeof(mreq))  0) {
+   if (iface-fd  0  setsockopt(iface-fd, IPPROTO_IPV6,
+   IPV6_JOIN_GROUP, mreq, sizeof(mreq))  0) {
log_warn(if_join_group: error IPV6_JOIN_GROUP, 
interface %s address %s, iface-name,
log_in6addr(addr));
@@ -762,12 +768,15 @@ if_leave_group(struct iface *iface, stru
mreq.ipv6mr_multiaddr = *addr;
mreq.ipv6mr_interface = iface-ifindex;
 
-   if (setsockopt(iface-fd, IPPROTO_IPV6, IPV6_LEAVE_GROUP,
-   (void *)mreq, sizeof(mreq))  0) {
-   log_warn(if_leave_group: error IPV6_LEAVE_GROUP, 
-   interface %s address %s, iface-name,
-   log_in6addr(addr));
-   return (-1);
+   if (iface-fd  0  setsockopt(iface-fd, IPPROTO_IPV6,
+   IPV6_LEAVE_GROUP, (void *)mreq, sizeof(mreq))  0) {
+   /* the interface might be already gone */
+   if (errno != ENXIO) {
+   log_warn(if_leave_group: error 
+   IPV6_LEAVE_GROUP, interface %s address %s,
+   iface-name, log_in6addr(addr));
+   return (-1);
+   }
}
break;
case IF_TYPE_POINTOMULTIPOINT:
@@ -790,9 +799,14 @@ if_set_mcast(struct iface *iface)
case IF_TYPE_BROADCAST:
if (setsockopt

Re: ospfd - always check config file permissions

2011-01-13 Thread Martin Pelikan
On Thu, Jan 13, 2011 at 04:02:47PM +0100, Henning Brauer wrote:
 the check is dirt cheap, so that is not the point.
 
 the aforementioned discussion is just being revived ;)

no problem then, here's the new one

--
Martin Pelikan


Index: parse.y
===
RCS file: /cvs/src/usr.sbin/bgpd/parse.y,v
retrieving revision 1.258
diff -u -p -r1.258 parse.y
--- parse.y 2 Sep 2010 14:03:21 -   1.258
+++ parse.y 13 Jan 2011 15:11:04 -
@@ -50,9 +50,8 @@ static struct file {
int  lineno;
int  errors;
 } *file, *topfile;
-struct file*pushfile(const char *, int);
+struct file*pushfile(const char *);
 int popfile(void);
-int check_file_secrecy(int, const char *);
 int yyparse(void);
 int yylex(void);
 int yyerror(const char *, ...);
@@ -312,7 +311,7 @@ varset  : STRING '=' string {
 include: INCLUDE STRING{
struct file *nfile;
 
-   if ((nfile = pushfile($2, 1)) == NULL) {
+   if ((nfile = pushfile($2)) == NULL) {
yyerror(failed to include file %s, $2);
free($2);
YYERROR;
@@ -2471,28 +2470,8 @@ nodigits:
return (c);
 }
 
-int
-check_file_secrecy(int fd, const char *fname)
-{
-   struct stat st;
-
-   if (fstat(fd, st)) {
-   log_warn(cannot stat %s, fname);
-   return (-1);
-   }
-   if (st.st_uid != 0  st.st_uid != getuid()) {
-   log_warnx(%s: owner not root or current user, fname);
-   return (-1);
-   }
-   if (st.st_mode  (S_IRWXG | S_IRWXO)) {
-   log_warnx(%s: group/world readable/writeable, fname);
-   return (-1);
-   }
-   return (0);
-}
-
 struct file *
-pushfile(const char *name, int secret)
+pushfile(const char *name)
 {
struct file *nfile;
 
@@ -2511,13 +2490,6 @@ pushfile(const char *name, int secret)
free(nfile);
return (NULL);
}
-   if (secret 
-   check_file_secrecy(fileno(nfile-stream), nfile-name)) {
-   fclose(nfile-stream);
-   free(nfile-name);
-   free(nfile);
-   return (NULL);
-   }
nfile-lineno = 1;
TAILQ_INSERT_TAIL(files, nfile, entry);
return (nfile);
@@ -2558,7 +2530,7 @@ parse_config(char *filename, struct bgpd
conf-opts = xconf-opts;
conf-csock = strdup(SOCKET_NAME);
 
-   if ((file = pushfile(filename, 1)) == NULL) {
+   if ((file = pushfile(filename)) == NULL) {
free(conf);
return (-1);
}
Index: parse.y
===
RCS file: /cvs/src/usr.sbin/ospfd/parse.y,v
retrieving revision 1.73
diff -u -p -r1.73 parse.y
--- parse.y 13 Dec 2010 13:43:37 -  1.73
+++ parse.y 13 Jan 2011 15:12:02 -
@@ -50,9 +50,8 @@ static struct file {
int  lineno;
int  errors;
 } *file, *topfile;
-struct file*pushfile(const char *, int);
+struct file*pushfile(const char *);
 int popfile(void);
-int check_file_secrecy(int, const char *);
 int yyparse(void);
 int yylex(void);
 int yyerror(const char *, ...);
@@ -149,7 +148,7 @@ grammar : /* empty */
 include: INCLUDE STRING{
struct file *nfile;
 
-   if ((nfile = pushfile($2, 1)) == NULL) {
+   if ((nfile = pushfile($2)) == NULL) {
yyerror(failed to include file %s, $2);
free($2);
YYERROR;
@@ -999,28 +998,8 @@ nodigits:
return (c);
 }
 
-int
-check_file_secrecy(int fd, const char *fname)
-{
-   struct stat st;
-
-   if (fstat(fd, st)) {
-   log_warn(cannot stat %s, fname);
-   return (-1);
-   }
-   if (st.st_uid != 0  st.st_uid != getuid()) {
-   log_warnx(%s: owner not root or current user, fname);
-   return (-1);
-   }
-   if (st.st_mode  (S_IRWXG | S_IRWXO)) {
-   log_warnx(%s: group/world readable/writeable, fname);
-   return (-1);
-   }
-   return (0);
-}
-
 struct file *
-pushfile(const char *name, int secret)
+pushfile(const char *name)
 {
struct file *nfile;
 
@@ -1038,12 +1017,6 @@ pushfile(const char *name, int secret)
free(nfile-name);
free(nfile);
return (NULL);
-   } else if (secret 
-   check_file_secrecy(fileno(nfile-stream), nfile-name

ospfd - always check config file permissions

2011-01-12 Thread Martin Pelikan
Hello,
this patch makes ospfd(8) and ospf6d(8) check its config file permissions
even if run with a -n to test it. bgpd already behaves this way (changed
6 years ago by henning@) and it's quite handy to fix the permissions while
doing tests, rather than at the first production boot time :-)
Any comments?
--
Martin Pelikan


Index: parse.y
===
RCS file: /cvs/src/usr.sbin/ospf6d/parse.y,v
retrieving revision 1.20
diff -u -p -r1.20 parse.y
--- parse.y 13 Dec 2010 13:43:37 -  1.20
+++ parse.y 12 Jan 2011 22:23:36 -
@@ -887,7 +887,7 @@ parse_config(char *filename, int opts)
conf-spf_hold_time = DEFAULT_SPF_HOLDTIME;
conf-spf_state = SPF_IDLE;
 
-   if ((file = pushfile(filename, !(conf-opts  OSPFD_OPT_NOACTION))) == 
NULL) {
+   if ((file = pushfile(filename, 1)) == NULL) {
free(conf);
return (NULL);
}
Index: parse.y
===
RCS file: /cvs/src/usr.sbin/ospfd/parse.y,v
retrieving revision 1.73
diff -u -p -r1.73 parse.y
--- parse.y 13 Dec 2010 13:43:37 -  1.73
+++ parse.y 12 Jan 2011 22:23:51 -
@@ -1092,7 +1092,7 @@ parse_config(char *filename, int opts)
conf-spf_hold_time = DEFAULT_SPF_HOLDTIME;
conf-spf_state = SPF_IDLE;
 
-   if ((file = pushfile(filename, !(conf-opts  OSPFD_OPT_NOACTION))) == 
NULL) {
+   if ((file = pushfile(filename, 1)) == NULL) {
free(conf);
return (NULL);
}



Re: ospfd - always check config file permissions

2011-01-12 Thread Martin Pelikan
On Thu, Jan 13, 2011 at 12:13:14AM +0100, Claudio Jeker wrote:
 Appart from my desire to kill the permission checking?
 I don't see why bgpd and ospfd needs this non-unix like behaviour, 
 other tools like pfctl do not care. We install the file with the correct
 permissions so if somebody changes them it is his fault. But this is just
 my opinion. 

Personally, I don't really care about the check. I saw bgpd notify me,
so I fixed that and then both ospfd's told me they were OK. Obviously
the errors followed next boot.
I'd just want the behavior to be consistent and the information
configuration OK to keep its meaning up. Because I found in CVS log
that Henning and Theo were discussing that earlier, I did it this way.
After all, it doesn't look like an expensive check, or is it?

--
Martin Pelikan



Re: PF misbehaving counters + code reuse

2010-09-10 Thread Martin Pelikan
Hello tech@,
please test this; I sent it to some folks but it looks like they were busy...


On Thu, Sep 02, 2010 at 06:07:57PM +0200, Henning Brauer wrote:
 this diff I like. get 2 or 3 ppl to test and I'll commit it :)


--
Martin Pelikan


Index: net/pf.c
===
RCS file: /cvs/src/sys/net/pf.c,v
retrieving revision 1.696
diff -u -p -r1.696 pf.c
--- net/pf.c5 Aug 2010 17:21:19 -   1.696
+++ net/pf.c1 Sep 2010 22:04:03 -
@@ -231,6 +231,10 @@ struct pf_state*pf_find_state(struct p
 int pf_src_connlimit(struct pf_state **);
 int pf_check_congestion(struct ifqueue *);
 int pf_match_rcvif(struct mbuf *, struct pf_rule *);
+voidpf_counters_inc(int, int,
+   struct pf_pdesc *, struct pfi_kif *,
+   struct pf_state *, struct pf_rule *,
+   struct pf_rule *);
 
 extern struct pool pfr_ktable_pl;
 extern struct pool pfr_kentry_pl;
@@ -5517,6 +5521,64 @@ pf_get_divert(struct mbuf *m)
return ((struct pf_divert *)(mtag + 1));
 }
 
+void
+pf_counters_inc(int dir, int action, struct pf_pdesc *pd,
+struct pfi_kif *kif, struct pf_state *s,
+struct pf_rule *r, struct pf_rule *a)
+{ 
+   int dirndx;
+   kif-pfik_bytes[pd-af == AF_INET6][dir == PF_OUT][action != PF_PASS]
+   += pd-tot_len;
+   kif-pfik_packets[pd-af == AF_INET6][dir == PF_OUT][action != 
PF_PASS]++;
+
+   if (action == PF_PASS || r-action == PF_DROP) {
+   dirndx = (dir == PF_OUT);
+   r-packets[dirndx]++;
+   r-bytes[dirndx] += pd-tot_len;
+   if (a != NULL) {
+   a-packets[dirndx]++;
+   a-bytes[dirndx] += pd-tot_len;
+   }
+   if (s != NULL) {
+   struct pf_rule_item *ri;
+   struct pf_sn_item   *sni;
+
+   SLIST_FOREACH(sni, s-src_nodes, next) {
+   sni-sn-packets[dirndx]++;
+   sni-sn-bytes[dirndx] += pd-tot_len;
+   }
+   dirndx = (dir == s-direction) ? 0 : 1;
+   s-packets[dirndx]++;
+   s-bytes[dirndx] += pd-tot_len;
+
+   /*
+* We want to increase counters on _all_ rules
+* that were matched during processing. 
+*  XXX This does NOT affect pass rules!
+*  XXX Change this in pf_test_rule()?
+*/
+   SLIST_FOREACH(ri, s-match_rules, entry) {
+   ri-r-packets[dirndx]++;
+   ri-r-bytes[dirndx] += pd-tot_len;
+   }
+   }
+   if (r-src.addr.type == PF_ADDR_TABLE)
+   pfr_update_stats(r-src.addr.p.tbl,
+   (s == NULL) ? pd-src :
+   s-key[(s-direction == PF_IN)]-
+   addr[(s-direction == PF_OUT)],
+   pd-af, pd-tot_len, dir == PF_OUT,
+   r-action == PF_PASS, r-src.neg);
+   if (r-dst.addr.type == PF_ADDR_TABLE)
+   pfr_update_stats(r-dst.addr.p.tbl,
+   (s == NULL) ? pd-dst :
+   s-key[(s-direction == PF_IN)]-
+   addr[(s-direction == PF_IN)],
+   pd-af, pd-tot_len, dir == PF_OUT,
+   r-action == PF_PASS, r-dst.neg);
+   }
+}
+
 #ifdef INET
 int
 pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
@@ -5530,8 +5592,8 @@ pf_test(int dir, struct ifnet *ifp, stru
struct pf_state *s = NULL;
struct pf_ruleset   *ruleset = NULL;
struct pf_pdesc  pd;
-   int  off, dirndx, pqid = 0;
-   u_int16_tqid;
+   int  off;
+   u_int32_tqid, pqid = 0;
 
if (!pf_status.running)
return (PF_PASS);
@@ -5812,48 +5872,7 @@ done:
}
}
 
-   kif-pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
-   kif-pfik_packets[0][dir == PF_OUT][action != PF_PASS]++;
-
-   if (action == PF_PASS || r-action == PF_DROP) {
-   dirndx = (dir == PF_OUT);
-   r-packets[dirndx]++;
-   r-bytes[dirndx] += pd.tot_len;
-   if (a != NULL) {
-   a-packets[dirndx]++;
-   a-bytes[dirndx] += pd.tot_len;
-   }
-   if (s != NULL) {
-   struct pf_rule_item *ri;
-   struct pf_sn_item   *sni

Re: PF misbehaving counters + code reuse

2010-09-01 Thread Martin Pelikan
Hello,
this is a new version based on corrections by henning@ with few more
things added.
I'm not sure whether the type changes in pfvar.h won't break anything,
but it would be nice to have the types uniform.
Any comments or suggestions?

--
Martin Pelikan


Index: net/pf.c
===
RCS file: /cvs/src/sys/net/pf.c,v
retrieving revision 1.696
diff -u -p -r1.696 pf.c
--- net/pf.c5 Aug 2010 17:21:19 -   1.696
+++ net/pf.c1 Sep 2010 22:04:03 -
@@ -231,6 +231,10 @@ struct pf_state*pf_find_state(struct p
 int pf_src_connlimit(struct pf_state **);
 int pf_check_congestion(struct ifqueue *);
 int pf_match_rcvif(struct mbuf *, struct pf_rule *);
+voidpf_inc_counters(int, int,
+   struct pf_pdesc *, struct pfi_kif *,
+   struct pf_state *, struct pf_rule *,
+   struct pf_rule *);
 
 extern struct pool pfr_ktable_pl;
 extern struct pool pfr_kentry_pl;
@@ -3413,10 +3417,10 @@ pf_test_fragment(struct pf_rule **rm, in
else if (r-proto  r-proto != pd-proto)
r = r-skip[PF_SKIP_PROTO].ptr;
else if (PF_MISMATCHAW(r-src.addr, pd-src, af,
-   r-src.neg, kif, /* XXX rdomain */ 0))
+   r-src.neg, kif, r-rtableid))
r = r-skip[PF_SKIP_SRC_ADDR].ptr;
else if (PF_MISMATCHAW(r-dst.addr, pd-dst, af,
-   r-dst.neg, NULL, /* XXX rdomain */ 0))
+   r-dst.neg, NULL, r-rtableid))
r = r-skip[PF_SKIP_DST_ADDR].ptr;
else if (r-tos  !(r-tos == pd-tos))
r = TAILQ_NEXT(r, entries);
@@ -5517,6 +5521,64 @@ pf_get_divert(struct mbuf *m)
return ((struct pf_divert *)(mtag + 1));
 }
 
+void
+pf_inc_counters(int dir, int action, struct pf_pdesc *pd,
+struct pfi_kif *kif, struct pf_state *s,
+struct pf_rule *r, struct pf_rule *a)
+{ 
+   int dirndx;
+   kif-pfik_bytes[pd-af == AF_INET6][dir == PF_OUT][action != PF_PASS]
+   += pd-tot_len;
+   kif-pfik_packets[pd-af == AF_INET6][dir == PF_OUT][action != 
PF_PASS]++;
+
+   if (action == PF_PASS || r-action == PF_DROP) {
+   dirndx = (dir == PF_OUT);
+   r-packets[dirndx]++;
+   r-bytes[dirndx] += pd-tot_len;
+   if (a != NULL) {
+   a-packets[dirndx]++;
+   a-bytes[dirndx] += pd-tot_len;
+   }
+   if (s != NULL) {
+   struct pf_rule_item *ri;
+   struct pf_sn_item   *sni;
+
+   SLIST_FOREACH(sni, s-src_nodes, next) {
+   sni-sn-packets[dirndx]++;
+   sni-sn-bytes[dirndx] += pd-tot_len;
+   }
+   dirndx = (dir == s-direction) ? 0 : 1;
+   s-packets[dirndx]++;
+   s-bytes[dirndx] += pd-tot_len;
+
+   /*
+* We want to increase counters on _all_ rules
+* that were matched during processing. 
+*  XXX This does NOT affect pass rules!
+*  XXX Change this in pf_test_rule()?
+*/
+   SLIST_FOREACH(ri, s-match_rules, entry) {
+   ri-r-packets[dirndx]++;
+   ri-r-bytes[dirndx] += pd-tot_len;
+   }
+   }
+   if (r-src.addr.type == PF_ADDR_TABLE)
+   pfr_update_stats(r-src.addr.p.tbl,
+   (s == NULL) ? pd-src :
+   s-key[(s-direction == PF_IN)]-
+   addr[(s-direction == PF_OUT)],
+   pd-af, pd-tot_len, dir == PF_OUT,
+   r-action == PF_PASS, r-src.neg);
+   if (r-dst.addr.type == PF_ADDR_TABLE)
+   pfr_update_stats(r-dst.addr.p.tbl,
+   (s == NULL) ? pd-dst :
+   s-key[(s-direction == PF_IN)]-
+   addr[(s-direction == PF_IN)],
+   pd-af, pd-tot_len, dir == PF_OUT,
+   r-action == PF_PASS, r-dst.neg);
+   }
+}
+
 #ifdef INET
 int
 pf_test(int dir, struct ifnet *ifp, struct mbuf **m0,
@@ -5530,8 +5592,8 @@ pf_test(int dir, struct ifnet *ifp, stru
struct pf_state *s = NULL;
struct pf_ruleset   *ruleset = NULL;
struct pf_pdesc  pd;
-   int  off, dirndx, pqid = 0;
-   u_int16_tqid;
+   int  off;
+   u_int32_tqid, pqid = 0