sparc64: delete 32bit process support from syscall()
Simplify syscall(): - if the trapframe is of a 32bit process, just call sigexit() instead of returning an error - delete other code for 32bit processes - 64bit processes only, so SYS__syscall can be handled the same as SYS_syscall - delete a superfluous cast builds and runs fine ok? Philip Index: trap.c === RCS file: /data/src/openbsd/src/sys/arch/sparc64/sparc64/trap.c,v retrieving revision 1.88 diff -u -p -r1.88 trap.c --- trap.c 27 Feb 2016 13:08:07 - 1.88 +++ trap.c 10 Sep 2016 03:27:37 - @@ -1222,6 +1222,9 @@ syscall(tf, code, pc) register_t args[8]; register_t rval[2]; + if ((tf->tf_out[6] & 1) == 0) + sigexit(p, SIGILL); + uvmexp.syscalls++; p = curproc; #ifdef DIAGNOSTIC @@ -1255,29 +1258,15 @@ syscall(tf, code, pc) switch (code) { case SYS_syscall: + case SYS___syscall: code = *ap++; nap--; break; - case SYS___syscall: - if (code < nsys && callp[code].sy_call != - callp[p->p_p->ps_emul->e_nosys].sy_call) - break; /* valid system call */ - if (tf->tf_out[6] & 1L) { - /* longs *are* quadwords */ - code = ap[0]; - ap += 1; - nap -= 1; - } else { - code = ap[_QUAD_LOWWORD]; - ap += 2; - nap -= 2; - } - break; } if (code < 0 || code >= nsys) callp += p->p_p->ps_emul->e_nosys; - else if (tf->tf_out[6] & 1L) { + else { register_t *argp; callp += code; @@ -1286,7 +1275,7 @@ syscall(tf, code, pc) if (i > 8) panic("syscall nargs"); /* Read the whole block in */ - if ((error = copyin((caddr_t)(u_long)tf->tf_out[6] + if ((error = copyin((caddr_t)tf->tf_out[6] + BIAS + offsetof(struct frame64, fr_argx), [nap], (i - nap) * sizeof(register_t goto bad; @@ -1298,9 +1287,6 @@ syscall(tf, code, pc) */ for (argp = args; i--;) *argp++ = *ap++; - } else { - error = EFAULT; - goto bad; } rval[0] = 0;
Re: sparc64: clean up db_trace.c
On Fri, 9 Sep 2016, Philip Guenther wrote: > Noticed while looking at Jasper's diff. > - convert declarations from k to standard C > - delete support for 32bit frame backtracing. I doubt this code has ever >been executed on OpenBSD. If a 32bit frame is encountered ((sp&1)==0) >then print a warning and stop processing the frames > - delete a pile of casts that are unnecessary > - minor whitespace tweaks > > build tested Testing "tr", "mach stack", "mach window", and "mach tf" showed one reversed test. Updated diff below. ok? Index: db_trace.c === RCS file: /data/src/openbsd/src/sys/arch/sparc64/sparc64/db_trace.c,v retrieving revision 1.10 diff -u -p -r1.10 db_trace.c --- db_trace.c 9 Feb 2015 09:21:30 - 1.10 +++ db_trace.c 10 Sep 2016 03:36:20 - @@ -56,12 +56,8 @@ void db_print_window(u_int64_t); #define ULOAD(x) probeget((paddr_t)(u_long)&(x), ASI_AIUS, sizeof(x)) void -db_stack_trace_print(addr, have_addr, count, modif, pr) - db_expr_t addr; - int have_addr; - db_expr_t count; - char*modif; - int (*pr)(const char *, ...); +db_stack_trace_print(db_expr_t addr, int have_addr, db_expr_t count, +char *modif, int (*pr)(const char *, ...)) { vaddr_t frame; boolean_t kernel_only = TRUE; @@ -95,28 +91,26 @@ db_stack_trace_print(addr, have_addr, co } } + if ((frame & 1) == 0) { + db_printf("WARNING: corrupt frame at %lx\n", frame); + return; + } + while (count--) { int i; db_expr_t offset; char*name; db_addr_t pc; struct frame64 *f64; - struct frame32 *f32; /* * Switch to frame that contains arguments */ - if (frame & 1) { - f64 = (struct frame64 *)(frame + BIAS); - pc = (db_addr_t)KLOAD(f64->fr_pc); - - frame = KLOAD(f64->fr_fp); - } else { - f32 = (struct frame32 *)(frame); - pc = (db_addr_t)KLOAD(f32->fr_pc); - - frame = (long)KLOAD(f32->fr_fp); - } + + f64 = (struct frame64 *)(frame + BIAS); + pc = (db_addr_t)KLOAD(f64->fr_pc); + + frame = KLOAD(f64->fr_fp); if (kernel_only) { if (pc < KERNBASE || pc >= KERNEND) @@ -137,22 +131,20 @@ db_stack_trace_print(addr, have_addr, co name = "?"; (*pr)("%s(", name); + + if ((frame & 1) == 0) { + db_printf(")\nWARNING: corrupt frame at %lx\n", frame); + break; + } /* * Print %i0..%i5; hope these still reflect the * actual arguments somewhat... */ - if (frame & 1) { - f64 = (struct frame64 *)(frame + BIAS); - for (i = 0; i < 5; i++) - (*pr)("%lx, ", (long)KLOAD(f64->fr_arg[i])); - (*pr)("%lx) at ", (long)KLOAD(f64->fr_arg[i])); - } else { - f32 = (struct frame32 *)(frame); - for (i = 0; i < 5; i++) - (*pr)("%x, ", (u_int)KLOAD(f32->fr_arg[i])); - (*pr)("%x) at ", (u_int)KLOAD(f32->fr_arg[i])); - } + f64 = (struct frame64 *)(frame + BIAS); + for (i = 0; i < 5; i++) + (*pr)("%lx, ", (long)KLOAD(f64->fr_arg[i])); + (*pr)("%lx) at ", (long)KLOAD(f64->fr_arg[i])); db_printsym(pc, DB_STGY_PROC, pr); (*pr)("\n"); } @@ -160,11 +152,7 @@ db_stack_trace_print(addr, have_addr, co void -db_dump_window(addr, have_addr, count, modif) - db_expr_t addr; - int have_addr; - db_expr_t count; - char *modif; +db_dump_window(db_expr_t addr, int have_addr, db_expr_t count, char *modif) { int i; u_int64_t frame = DDB_TF->tf_out[6]; @@ -174,10 +162,15 @@ db_dump_window(addr, have_addr, count, m addr = 0; /* Traverse window stack */ - for (i=0; ifr_fp; - else frame = (u_int64_t)((struct frame32 *)(u_long)frame)->fr_fp; + for (i = 0; i < addr && frame; i++) { + if ((frame & 1) == 0) + break; + frame = ((struct frame64 *)(frame + BIAS))->fr_fp; + } + + if ((frame & 1) == 0) { + db_printf("WARNING: corrupt frame at
sparc64: clean up db_trace.c
Noticed while looking at Jasper's diff. - convert declarations from k to standard C - delete support for 32bit frame backtracing. I doubt this code has ever been executed on OpenBSD. If a 32bit frame is encountered ((sp&1)==0) then print a warning and stop processing the frames - delete a pile of casts that are unnecessary - minor whitespace tweaks build tested ok? Philip Index: db_trace.c === RCS file: /data/src/openbsd/src/sys/arch/sparc64/sparc64/db_trace.c,v retrieving revision 1.10 diff -u -p -r1.10 db_trace.c --- db_trace.c 9 Feb 2015 09:21:30 - 1.10 +++ db_trace.c 10 Sep 2016 03:07:37 - @@ -56,12 +56,8 @@ void db_print_window(u_int64_t); #define ULOAD(x) probeget((paddr_t)(u_long)&(x), ASI_AIUS, sizeof(x)) void -db_stack_trace_print(addr, have_addr, count, modif, pr) - db_expr_t addr; - int have_addr; - db_expr_t count; - char*modif; - int (*pr)(const char *, ...); +db_stack_trace_print(db_expr_t addr, int have_addr, db_expr_t count, +char *modif, int (*pr)(const char *, ...)) { vaddr_t frame; boolean_t kernel_only = TRUE; @@ -95,28 +91,26 @@ db_stack_trace_print(addr, have_addr, co } } + if ((frame & 1) == 0) { + db_printf("WARNING: corrupt frame at %lx\n", frame); + return; + } + while (count--) { int i; db_expr_t offset; char*name; db_addr_t pc; struct frame64 *f64; - struct frame32 *f32; /* * Switch to frame that contains arguments */ - if (frame & 1) { - f64 = (struct frame64 *)(frame + BIAS); - pc = (db_addr_t)KLOAD(f64->fr_pc); - - frame = KLOAD(f64->fr_fp); - } else { - f32 = (struct frame32 *)(frame); - pc = (db_addr_t)KLOAD(f32->fr_pc); - - frame = (long)KLOAD(f32->fr_fp); - } + + f64 = (struct frame64 *)(frame + BIAS); + pc = (db_addr_t)KLOAD(f64->fr_pc); + + frame = KLOAD(f64->fr_fp); if (kernel_only) { if (pc < KERNBASE || pc >= KERNEND) @@ -137,22 +131,20 @@ db_stack_trace_print(addr, have_addr, co name = "?"; (*pr)("%s(", name); + + if ((frame & 1) == 0) { + db_printf(")\nWARNING: corrupt frame at %lx\n", frame); + break; + } /* * Print %i0..%i5; hope these still reflect the * actual arguments somewhat... */ - if (frame & 1) { - f64 = (struct frame64 *)(frame + BIAS); - for (i = 0; i < 5; i++) - (*pr)("%lx, ", (long)KLOAD(f64->fr_arg[i])); - (*pr)("%lx) at ", (long)KLOAD(f64->fr_arg[i])); - } else { - f32 = (struct frame32 *)(frame); - for (i = 0; i < 5; i++) - (*pr)("%x, ", (u_int)KLOAD(f32->fr_arg[i])); - (*pr)("%x) at ", (u_int)KLOAD(f32->fr_arg[i])); - } + f64 = (struct frame64 *)(frame + BIAS); + for (i = 0; i < 5; i++) + (*pr)("%lx, ", (long)KLOAD(f64->fr_arg[i])); + (*pr)("%lx) at ", (long)KLOAD(f64->fr_arg[i])); db_printsym(pc, DB_STGY_PROC, pr); (*pr)("\n"); } @@ -160,11 +152,7 @@ db_stack_trace_print(addr, have_addr, co void -db_dump_window(addr, have_addr, count, modif) - db_expr_t addr; - int have_addr; - db_expr_t count; - char *modif; +db_dump_window(db_expr_t addr, int have_addr, db_expr_t count, char *modif) { int i; u_int64_t frame = DDB_TF->tf_out[6]; @@ -174,10 +162,15 @@ db_dump_window(addr, have_addr, count, m addr = 0; /* Traverse window stack */ - for (i=0; ifr_fp; - else frame = (u_int64_t)((struct frame32 *)(u_long)frame)->fr_fp; + for (i = 0; i < addr && frame; i++) { + if ((frame & 1) == 0) + break; + frame = ((struct frame64 *)(frame + BIAS))->fr_fp; + } + + if (frame & 1) { + db_printf("WARNING: corrupt frame at %llx\n", frame); + return; } db_printf("Window %lx ", addr); @@ -185,96 +178,67 @@ db_dump_window(addr, have_addr, count, m } void
Re: db_trace.c: use __func__ instead of hardcoding filename
On Fri, Sep 9, 2016 at 3:19 PM, Jasper Lievisse Adriaansewrote: > On Fri, Sep 09, 2016 at 01:54:46PM -0700, Philip Guenther wrote: >> On Fri, Sep 9, 2016 at 11:38 AM, Jasper Lievisse Adriaanse >> wrote: >> > Do we really want the filename to be printed in the message? If so we >> > should >> > use __FILE__. On the other hand, having the function name makes more sense >> > to >> > me. >> >> This is a "not found" message in response to an explicit user command: >> why does the user care what *source file* is generating that message? > I have no idea why the user would care about the source file. However the > function triggering it might be of some more information, albeit of dubious > added value. Perhaps the message should be normalized across the board > following arm/sparc64? In a word: YES.
Re: syslogd validate client certificates
On Mon, Sep 05, 2016 at 03:25:22AM +0200, Alexander Bluhm wrote: > Hi, > > Add an option to give syslogd a server CA that is used to validate > client certificates. This prevent that malicious clients can send > fake messages. > > ok? Anyone? > > bluhm > > Index: usr.sbin/syslogd/syslogd.8 > === > RCS file: /data/mirror/openbsd/cvs/src/usr.sbin/syslogd/syslogd.8,v > retrieving revision 1.42 > diff -u -p -r1.42 syslogd.8 > --- usr.sbin/syslogd/syslogd.812 Jul 2016 23:04:30 - 1.42 > +++ usr.sbin/syslogd/syslogd.85 Sep 2016 01:18:00 - > @@ -44,6 +44,7 @@ > .Op Fl C Ar CAfile > .Op Fl c Ar cert_file > .Op Fl f Ar config_file > +.Op Fl K Ar server_CAfile > .Op Fl k Ar key_file > .Op Fl m Ar mark_interval > .Op Fl p Ar log_socket > @@ -83,6 +84,11 @@ PEM encoded file containing CA certifica > validation; > the default is > .Pa /etc/ssl/cert.pem . > +Validate remote server certificates and their hostnames with this > +CA to prevent that malicious servers read messages. > +This validation can be explicitly turned off using the > +.Fl V > +switch. > .It Fl c Ar cert_file > PEM encoded file containing the client certificate for TLS connections > to a remote host. > @@ -102,6 +108,12 @@ the default is > .Pa /etc/syslog.conf . > .It Fl h > Include the hostname when forwarding messages to a remote host. > +.It Fl K Ar server_CAfile > +PEM encoded file containing CA certificates used for certificate > +valitation on the local server socket. > +By default incomming connections from any TLS server are allowed. > +Enforce client certificates and validate them with this CA to prevent > +that malicious clients send fake messages. > .It Fl k Ar key_file > PEM encoded file containing the client private key for TLS connections > to a remote host. > @@ -170,7 +182,8 @@ accept input from the UDP port. > Some software wants this, but you can be subjected to a variety of > attacks over the network, including attackers remotely filling logs. > .It Fl V > -Do not perform server certificate and hostname validation. > +Do not perform remote server certificate and hostname validation > +when sending messages. > .El > .Pp > .Nm > Index: usr.sbin/syslogd/syslogd.c > === > RCS file: /data/mirror/openbsd/cvs/src/usr.sbin/syslogd/syslogd.c,v > retrieving revision 1.212 > diff -u -p -r1.212 syslogd.c > --- usr.sbin/syslogd/syslogd.c29 Aug 2016 20:31:56 - 1.212 > +++ usr.sbin/syslogd/syslogd.c5 Sep 2016 01:20:06 - > @@ -225,8 +225,9 @@ structtls *server_ctx; > struct tls_config *client_config, *server_config; > const char *CAfile = "/etc/ssl/cert.pem"; /* file containing CA certificates > */ > int NoVerify = 0; /* do not verify TLS server x509 certificate */ > -char *ClientCertfile = NULL; > -char *ClientKeyfile = NULL; > +const char *ClientCertfile = NULL; > +const char *ClientKeyfile = NULL; > +const char *ServerCAfile = NULL; > int tcpbuf_dropped = 0; /* count messages dropped from TCP or TLS */ > > #define CTL_READING_CMD 1 > @@ -356,7 +357,7 @@ main(int argc, char *argv[]) > int ch, i; > int lockpipe[2] = { -1, -1}, pair[2], nullfd, fd; > > - while ((ch = getopt(argc, argv, "46a:C:c:dFf:hk:m:np:S:s:T:U:uV")) > + while ((ch = getopt(argc, argv, "46a:C:c:dFf:hK:k:m:np:S:s:T:U:uV")) > != -1) > switch (ch) { > case '4': /* disable IPv6 */ > @@ -388,6 +389,9 @@ main(int argc, char *argv[]) > case 'h': /* RFC 3164 hostnames */ > IncludeHostname = 1; > break; > + case 'K': /* verify client with CA file */ > + ServerCAfile = optarg; > + break; > case 'k': /* file containing client key */ > ClientKeyfile = optarg; > break; > @@ -625,6 +629,17 @@ main(int argc, char *argv[]) > break; > } > > + if (ServerCAfile) { > + if (tls_config_set_ca_file(server_config, > + ServerCAfile) == -1) { > + logerrortlsconf("Load server TLS CA failed", > + server_config); > + /* avoid reading default certs in chroot */ > + tls_config_set_ca_mem(server_config, "", 0); > + } else > + logdebug("Server CAfile %s\n", ServerCAfile); > + tls_config_verify_client(server_config); > + } > tls_config_set_protocols(server_config, TLS_PROTOCOLS_ALL); > if (tls_config_set_ciphers(server_config, "compat")
Re: db_trace.c: use __func__ instead of hardcoding filename
On Fri, Sep 09, 2016 at 01:54:46PM -0700, Philip Guenther wrote: > On Fri, Sep 9, 2016 at 11:38 AM, Jasper Lievisse Adriaanse >wrote: > > Do we really want the filename to be printed in the message? If so we should > > use __FILE__. On the other hand, having the function name makes more sense > > to > > me. > > This is a "not found" message in response to an explicit user command: > why does the user care what *source file* is generating that message? I have no idea why the user would care about the source file. However the function triggering it might be of some more information, albeit of dubious added value. Perhaps the message should be normalized across the board following arm/sparc64? > arm and sparc64 do the Right Thing IMO: > if (p == NULL) { > (*pr)("not found\n"); > return; > } > > Philip Guenther -- jasper
Re: reduce double caching in mfs
I really dislike "CHEAP". and it almost seems like these should actually be NOCACHE.. why the heck can't they be? On Thu, Sep 8, 2016 at 7:49 PM, Ted Unangstwrote: > Currently, the bufcache doesn't know that mfs is backed by memory. All i/o > to > mfs ends up being double cached, once in the userland process and again in > the > kernel bufcache. This is wasteful. In particular, it means one can't use > mfs > to increase the effective size of the buffer cache. Reading or writing to > mfs > will push out buffers used for disk caching. (I think you can even end up > with > triple buffering when mfs starts swapping...) > > This is mostly inherent to the design of mfs. But with a small tweak to the > buffer cache, we can improve the situation. This introduces the concept of > 'cheap' buffers, a hint to the buffer cache that they are easily replaced. > (There's a 'nocache' flag already, but it's not suitable here.) When mfs > finishes with a buf, it marks it cheap. Then it goes onto a special queue > that > gets chewed up before we start looking at the regular caches. We still > cache > some number of cheap buffers to prevent constant memory copying. > > With this diff, I've confirmed that reading/writing large files to mfs > doesn't > flush the cache, but performance appears about the same. (Of particular > note, > my bufcache is big enough to cache all of src/, but not src/ and obj/. With > obj/ on mfs, src never gets flushed.) > > > Index: kern/vfs_bio.c > === > RCS file: /cvs/src/sys/kern/vfs_bio.c,v > retrieving revision 1.176 > diff -u -p -r1.176 vfs_bio.c > --- kern/vfs_bio.c 4 Sep 2016 10:51:24 - 1.176 > +++ kern/vfs_bio.c 8 Sep 2016 18:31:52 - > @@ -93,7 +93,10 @@ int bd_req; /* Sleep point for cleaner > > #define NUM_CACHES 2 > #define DMA_CACHE 0 > +#define CHEAP_LIMIT 256 > struct bufcache cleancache[NUM_CACHES]; > +struct bufqueue cheapqueue; > +u_int cheapqueuelen; > struct bufqueue dirtyqueue; > > void > @@ -1297,6 +1300,7 @@ bufcache_init(void) > TAILQ_INIT([i].coldqueue); > TAILQ_INIT([i].warmqueue); > } > + TAILQ_INIT(); > TAILQ_INIT(); > } > > @@ -1329,6 +1333,12 @@ bufcache_getcleanbuf(int cachenum, int d > > splassert(IPL_BIO); > > + /* try cheap queue if over limit */ > + if (discard || cheapqueuelen > CHEAP_LIMIT) { > + if ((bp = TAILQ_FIRST())) > + return bp; > + } > + > /* try cold queue */ > while ((bp = TAILQ_FIRST(>coldqueue))) { > if ((!discard) && > @@ -1356,6 +1366,8 @@ bufcache_getcleanbuf(int cachenum, int d > /* buffer is cold - give it up */ > return bp; > } > + if ((bp = TAILQ_FIRST())) > + return bp; > if ((bp = TAILQ_FIRST(>warmqueue))) > return bp; > if ((bp = TAILQ_FIRST(>hotqueue))) > @@ -1410,6 +1422,13 @@ bufcache_take(struct buf *bp) > pages = atop(bp->b_bufsize); > struct bufcache *cache = [bp->cache]; > if (!ISSET(bp->b_flags, B_DELWRI)) { > + if (ISSET(bp->b_flags, B_CHEAP)) { > + TAILQ_REMOVE(, bp, b_freelist); > + cheapqueuelen--; > + CLR(bp->b_flags, B_CHEAP); > + return; > + } > + > if (ISSET(bp->b_flags, B_COLD)) { > queue = >coldqueue; > } else if (ISSET(bp->b_flags, B_WARM)) { > @@ -1462,11 +1481,17 @@ bufcache_release(struct buf *bp) > struct bufqueue *queue; > int64_t pages; > struct bufcache *cache = [bp->cache]; > + > pages = atop(bp->b_bufsize); > KASSERT(ISSET(bp->b_flags, B_BC)); > KASSERT((ISSET(bp->b_flags, B_DMA) && bp->cache == 0) > || ((!ISSET(bp->b_flags, B_DMA)) && bp->cache > 0)); > if (!ISSET(bp->b_flags, B_DELWRI)) { > + if (ISSET(bp->b_flags, B_CHEAP)) { > + TAILQ_INSERT_TAIL(, bp, b_freelist); > + cheapqueuelen++; > + return; > + } > int64_t *queuepages; > if (ISSET(bp->b_flags, B_WARM | B_COLD)) { > SET(bp->b_flags, B_WARM); > Index: sys/buf.h > === > RCS file: /cvs/src/sys/sys/buf.h,v > retrieving revision 1.103 > diff -u -p -r1.103 buf.h > --- sys/buf.h 23 May 2016 09:31:28 - 1.103 > +++ sys/buf.h 8 Sep 2016 17:20:12 - > @@ -221,12 +221,14 @@ struct bufcache { > #defineB_COLD 0x0100 /* buffer is on the cold > queue */ > #defineB_BC0x0200 /* buffer is managed by > the cache */ > #defineB_DMA
Re: db_trace.c: use __func__ instead of hardcoding filename
On Fri, Sep 9, 2016 at 11:38 AM, Jasper Lievisse Adriaansewrote: > Do we really want the filename to be printed in the message? If so we should > use __FILE__. On the other hand, having the function name makes more sense to > me. This is a "not found" message in response to an explicit user command: why does the user care what *source file* is generating that message? arm and sparc64 do the Right Thing IMO: if (p == NULL) { (*pr)("not found\n"); return; } Philip Guenther
Re: db_trace.c: use __func__ instead of hardcoding filename
On Fri, 09 Sep 2016 20:38:05 +0200, Jasper Lievisse Adriaanse wrote: > Do we really want the filename to be printed in the message? If so we should > use __FILE__. On the other hand, having the function name makes more sense to > me. Much more useful, OK millert@ - todd
db_trace.c: use __func__ instead of hardcoding filename
Hi, Do we really want the filename to be printed in the message? If so we should use __FILE__. On the other hand, having the function name makes more sense to me. OK? Index: amd64/amd64/db_trace.c === RCS file: /cvs/src/sys/arch/amd64/amd64/db_trace.c,v retrieving revision 1.20 diff -u -p -r1.20 db_trace.c --- amd64/amd64/db_trace.c 4 Sep 2016 09:22:28 - 1.20 +++ amd64/amd64/db_trace.c 9 Sep 2016 18:35:09 - @@ -174,7 +174,7 @@ db_stack_trace_print(db_expr_t addr, boo if (trace_proc) { struct proc *p = pfind((pid_t)addr); if (p == NULL) { - (*pr) ("db_trace.c: process not found\n"); + (*pr) ("%s: process not found\n", __func__); return; } frame = (struct callframe *)p->p_addr->u_pcb.pcb_rbp; Index: i386/i386/db_trace.c === RCS file: /cvs/src/sys/arch/i386/i386/db_trace.c,v retrieving revision 1.19 diff -u -p -r1.19 db_trace.c --- i386/i386/db_trace.c3 Mar 2016 12:44:09 - 1.19 +++ i386/i386/db_trace.c9 Sep 2016 18:35:09 - @@ -186,11 +186,11 @@ db_stack_trace_print(db_expr_t addr, boo frame = (struct callframe *)ddb_regs.tf_ebp; callpc = (db_addr_t)ddb_regs.tf_eip; } else if (trace_thread) { - (*pr) ("db_trace.c: can't trace thread\n"); + (*pr) ("%s: can't trace thread\n", __func__); } else if (trace_proc) { struct proc *p = pfind((pid_t)addr); if (p == NULL) { - (*pr) ("db_trace.c: process not found\n"); + (*pr) ("%s: process not found\n", __func__); return; } frame = (struct callframe *)p->p_addr->u_pcb.pcb_ebp; -- jasper
Do not call usbd_set_config_no()...
...and instead let the stack do it for you. Changing the configuration of a device implies fetching and parsing descriptors. If anything bad happen, most of the device below are left attached but unconfigured, and many of them do not even deactivate the device. Test reports and oks welcome. Index: if_atu.c === RCS file: /cvs/src/sys/dev/usb/if_atu.c,v retrieving revision 1.119 diff -u -p -r1.119 if_atu.c --- if_atu.c13 Apr 2016 11:03:37 - 1.119 +++ if_atu.c9 Sep 2016 15:39:39 - @@ -1102,7 +1102,7 @@ atu_match(struct device *parent, void *m struct usb_attach_arg *uaa = aux; int i; - if (!uaa->iface) + if (uaa->iface == NULL || uaa->configno != ATU_CONFIG_NO) return(UMATCH_NONE); for (i = 0; i < nitems(atu_devs); i++) { @@ -1252,13 +1252,6 @@ atu_attach(struct device *parent, struct sc->atu_unit = self->dv_unit; sc->atu_udev = dev; - - err = usbd_set_config_no(dev, ATU_CONFIG_NO, 1); - if (err) { - printf("%s: setting config no failed\n", - sc->atu_dev.dv_xname); - goto fail; - } err = usbd_device2interface_handle(dev, ATU_IFACE_IDX, >atu_iface); if (err) { Index: if_cue.c === RCS file: /cvs/src/sys/dev/usb/if_cue.c,v retrieving revision 1.75 diff -u -p -r1.75 if_cue.c --- if_cue.c13 Apr 2016 11:03:37 - 1.75 +++ if_cue.c9 Sep 2016 15:49:23 - @@ -415,7 +415,7 @@ cue_match(struct device *parent, void *m { struct usb_attach_arg *uaa = aux; - if (uaa->iface != NULL) + if (uaa->iface == NULL || uaa->configno != CUE_CONFIG_NO) return (UMATCH_NONE); return (usb_lookup(cue_devs, uaa->vendor, uaa->product) != NULL ? @@ -444,14 +444,6 @@ cue_attach(struct device *parent, struct DPRINTFN(5,(" : cue_attach: sc=%p, dev=%p", sc, dev)); sc->cue_udev = dev; - - err = usbd_set_config_no(dev, CUE_CONFIG_NO, 1); - if (err) { - printf("%s: setting config no failed\n", - sc->cue_dev.dv_xname); - return; - } - sc->cue_product = uaa->product; sc->cue_vendor = uaa->vendor; Index: if_mos.c === RCS file: /cvs/src/sys/dev/usb/if_mos.c,v retrieving revision 1.36 diff -u -p -r1.36 if_mos.c --- if_mos.c13 Apr 2016 11:03:37 - 1.36 +++ if_mos.c9 Sep 2016 15:40:38 - @@ -607,7 +607,7 @@ mos_match(struct device *parent, void *m { struct usb_attach_arg *uaa = aux; - if (!uaa->iface) + if (uaa->iface == NULL || uaa->configno != MOS_CONFIG_NO) return(UMATCH_NONE); return (mos_lookup(uaa->vendor, uaa->product) != NULL ? @@ -634,13 +634,6 @@ mos_attach(struct device *parent, struct sc->mos_udev = dev; sc->mos_unit = self->dv_unit; - - err = usbd_set_config_no(dev, MOS_CONFIG_NO, 1); - if (err) { - printf("%s: getting interface handle failed\n", - sc->mos_dev.dv_xname); - return; - } usb_init_task(>mos_tick_task, mos_tick_task, sc, USB_TASK_TYPE_GENERIC); Index: if_otus.c === RCS file: /cvs/src/sys/dev/usb/if_otus.c,v retrieving revision 1.53 diff -u -p -r1.53 if_otus.c --- if_otus.c 13 Apr 2016 11:03:37 - 1.53 +++ if_otus.c 9 Sep 2016 15:41:05 - @@ -176,7 +176,7 @@ otus_match(struct device *parent, void * { struct usb_attach_arg *uaa = aux; - if (uaa->iface != NULL) + if (uaa->iface == NULL || uaa->configno != 1) return UMATCH_NONE; return (usb_lookup(otus_devs, uaa->vendor, uaa->product) != NULL) ? @@ -198,12 +198,6 @@ otus_attach(struct device *parent, struc sc->amrr.amrr_min_success_threshold = 1; sc->amrr.amrr_max_success_threshold = 10; - - if (usbd_set_config_no(sc->sc_udev, 1, 0) != 0) { - printf("%s: could not set configuration no\n", - sc->sc_dev.dv_xname); - return; - } /* Get the first interface handle. */ error = usbd_device2interface_handle(sc->sc_udev, 0, >sc_iface); Index: if_ral.c === RCS file: /cvs/src/sys/dev/usb/if_ral.c,v retrieving revision 1.140 diff -u -p -r1.140 if_ral.c --- if_ral.c20 Jul 2016 10:24:43 - 1.140 +++ if_ral.c9 Sep 2016 15:41:31 - @@ -196,7 +196,7 @@ ural_match(struct device *parent, void * { struct usb_attach_arg *uaa = aux; - if (uaa->iface != NULL) + if (uaa->iface == NULL || uaa->configno != RAL_CONFIG_NO) return UMATCH_NONE;
Introduce usbd_parse_idesc()
Our USB stack is very lazy at validating descriptor. In order to reject device with malformed endpoint descriptors I'd like to introduce a new function, usbd_parse_idesc(), that I will extend with more checks. For now this function only contain the checks present in usbd_fill_iface_data() but it will be soon extended. ok? Index: usb_subr.c === RCS file: /cvs/src/sys/dev/usb/usb_subr.c,v retrieving revision 1.127 diff -u -p -r1.127 usb_subr.c --- usb_subr.c 2 Sep 2016 11:14:17 - 1.127 +++ usb_subr.c 9 Sep 2016 15:24:07 - @@ -73,6 +73,7 @@ usbd_status usbd_probe_and_attach(struct intusbd_printBCD(char *cp, size_t len, int bcd); void usb_free_device(struct usbd_device *); +intusbd_parse_idesc(struct usbd_device *, struct usbd_interface *); #ifdef USBVERBOSE #include @@ -495,66 +496,34 @@ usbd_find_edesc(usb_config_descriptor_t return (NULL); } -usbd_status -usbd_fill_iface_data(struct usbd_device *dev, int ifaceidx, int altidx) +int +usbd_parse_idesc(struct usbd_device *dev, struct usbd_interface *ifc) { - struct usbd_interface *ifc = >ifaces[ifaceidx]; - usb_interface_descriptor_t *idesc; +#define ed ((usb_endpoint_descriptor_t *)p) char *p, *end; - int endpt, nendpt; - - DPRINTFN(4,("%s: ifaceidx=%d altidx=%d\n", __func__, ifaceidx, altidx)); - - idesc = usbd_find_idesc(dev->cdesc, ifaceidx, altidx); - if (idesc == NULL) - return (USBD_INVAL); - - nendpt = idesc->bNumEndpoints; - DPRINTFN(4,("%s: found idesc nendpt=%d\n", __func__, nendpt)); - - ifc->device = dev; - ifc->idesc = idesc; - ifc->index = ifaceidx; - ifc->altindex = altidx; - ifc->endpoints = NULL; - ifc->priv = NULL; - LIST_INIT(>pipes); - - if (nendpt != 0) { - ifc->endpoints = mallocarray(nendpt, - sizeof(struct usbd_endpoint), M_USB, M_NOWAIT | M_ZERO); - if (ifc->endpoints == NULL) - return (USBD_NOMEM); - } + int i; p = (char *)ifc->idesc + ifc->idesc->bLength; end = (char *)dev->cdesc + UGETW(dev->cdesc->wTotalLength); -#define ed ((usb_endpoint_descriptor_t *)p) - for (endpt = 0; endpt < nendpt; endpt++) { - DPRINTFN(10,("%s: endpt=%d\n", __func__, endpt)); + + for (i = 0; i < ifc->idesc->bNumEndpoints; i++) { for (; p < end; p += ed->bLength) { - DPRINTFN(10,("%s: p=%p end=%p len=%d type=%d\n", - __func__, p, end, ed->bLength, - ed->bDescriptorType)); if (p + ed->bLength <= end && ed->bLength != 0 && ed->bDescriptorType == UDESC_ENDPOINT) - goto found; + break; + if (ed->bLength == 0 || ed->bDescriptorType == UDESC_INTERFACE) - break; + return (-1); } - /* passed end, or bad desc */ - printf("%s: bad descriptor(s): %s\n", __func__, - ed->bLength == 0 ? "0 length" : - ed->bDescriptorType == UDESC_INTERFACE ? "iface desc" : - "out of data"); - goto bad; - found: - ifc->endpoints[endpt].edesc = ed; + + if (p >= end) + return (-1); + if (dev->speed == USB_SPEED_HIGH) { - u_int mps; - /* Control and bulk endpoints have max packet - limits. */ + unsigned int mps; + + /* Control and bulk endpoints have max packet limits. */ switch (UE_GET_XFERTYPE(ed->bmAttributes)) { case UE_CONTROL: mps = USB_2_MAX_CTRL_PACKET; @@ -572,19 +541,55 @@ usbd_fill_iface_data(struct usbd_device break; } } - ifc->endpoints[endpt].refcnt = 0; - ifc->endpoints[endpt].savedtoggle = 0; + + ifc->endpoints[i].edesc = ed; + ifc->endpoints[i].refcnt = 0; + ifc->endpoints[i].savedtoggle = 0; p += ed->bLength; } + + return (0); #undef ed - return (USBD_NORMAL_COMPLETION); +} + +usbd_status +usbd_fill_iface_data(struct usbd_device *dev, int ifaceidx, int altidx) +{ + struct usbd_interface *ifc = >ifaces[ifaceidx]; + usb_interface_descriptor_t *idesc; + int nendpt; + + DPRINTFN(4,("%s: ifaceidx=%d altidx=%d\n", __func__, ifaceidx, altidx)); + + idesc = usbd_find_idesc(dev->cdesc, ifaceidx, altidx); + if (idesc
[t...@math.ethz.ch: Re: fsdb pledge]
I think the patch below is a bit cleaner than the one I committed. ok? - Forwarded message from Theo Buehler- Date: Sun, 21 Aug 2016 03:27:49 +0200 From: Theo Buehler To: m...@openbsd.org Subject: Re: fsdb pledge > this is kinda gross, but ok for now. I agree that it isn't very nice. How about this version that passes a flag to setup()? Unless I'm missing something, the only reason for the pledges being conditional on !hotroot() is the mount(2) call in fsck_ffs/main.c:336, where a ro root filesystem is remounted rw in the hotroot() case. In fsdb there's no such remounting, so this conditional pledge wasn't actually right for fsdb anyway. Index: fsdb/fsdb.c === RCS file: /var/cvs/src/sbin/fsdb/fsdb.c,v retrieving revision 1.30 diff -u -p -r1.30 fsdb.c --- fsdb/fsdb.c 7 Jun 2016 01:29:38 - 1.30 +++ fsdb/fsdb.c 21 Aug 2016 00:27:59 - @@ -103,7 +103,7 @@ main(int argc, char *argv[]) } if (fsys == NULL) usage(); - if (!setup(fsys)) + if (!setup(fsys, 1)) errx(1, "cannot set up file system `%s'", fsys); printf("Editing file system `%s'\nLast Mounted on %s\n", fsys, sblock.fs_fsmnt); Index: fsck_ffs/extern.h === RCS file: /var/cvs/src/sbin/fsck_ffs/extern.h,v retrieving revision 1.13 diff -u -p -r1.13 extern.h --- fsck_ffs/extern.h 6 Sep 2014 04:05:40 - 1.13 +++ fsck_ffs/extern.h 21 Aug 2016 00:30:20 - @@ -69,7 +69,7 @@ void pinode(ino_t); void propagate(ino_t); intreply(char *); void setinodebuf(ino_t); -intsetup(char *); +intsetup(char *, int); union dinode * getnextinode(ino_t); void catch(int); void catchquit(int); Index: fsck_ffs/main.c === RCS file: /var/cvs/src/sbin/fsck_ffs/main.c,v retrieving revision 1.49 diff -u -p -r1.49 main.c --- fsck_ffs/main.c 24 Nov 2015 21:42:54 - 1.49 +++ fsck_ffs/main.c 21 Aug 2016 00:26:53 - @@ -170,7 +170,7 @@ checkfilesys(char *filesys, char *mntpt, if (debug && preen) pwarn("starting\n"); - switch (setup(filesys)) { + switch (setup(filesys, 0)) { case 0: if (preen) pfatal("CAN'T CHECK FILE SYSTEM."); Index: fsck_ffs/setup.c === RCS file: /var/cvs/src/sbin/fsck_ffs/setup.c,v retrieving revision 1.61 diff -u -p -r1.61 setup.c --- fsck_ffs/setup.c20 Aug 2016 15:04:21 - 1.61 +++ fsck_ffs/setup.c21 Aug 2016 00:40:20 - @@ -77,7 +77,7 @@ static const int sbtry[] = SBLOCKSEARCH; static const int altsbtry[] = { 32, 64, 128, 144, 160, 192, 256 }; int -setup(char *dev) +setup(char *dev, int isfsdb) { long cg, size, asked, i, j; size_t bmapsize; @@ -102,7 +102,7 @@ setup(char *dev) strlcpy(rdevname, realdev, sizeof(rdevname)); setcdevname(rdevname, dev, preen); - if (!hotroot()) + if (isfsdb || !hotroot()) if (pledge("stdio rpath wpath getpw tty disklabel", NULL) == -1) err(1, "pledge"); @@ -145,15 +145,13 @@ setup(char *dev) else secsize = DEV_BSIZE; - if (!hotroot()) { + if (isfsdb) { + if (pledge("stdio rpath getpw tty", NULL) == -1) + err(1, "pledge"); + } else if (!hotroot()) { #ifndef SMALL - if (strcmp("fsdb", getprogname()) == 0) { - if (pledge("stdio rpath getpw tty", NULL) == -1) - err(1, "pledge"); - } else { - if (pledge("stdio getpw", NULL) == -1) - err(1, "pledge"); - } + if (pledge("stdio getpw", NULL) == -1) + err(1, "pledge"); #else if (pledge("stdio", NULL) == -1) err(1, "pledge"); - End forwarded message -
Re: Drop main() prototype
On 05/09/2016 11:05, Marc Espie wrote: > On Mon, Sep 05, 2016 at 04:04:23AM -0400, Ted Unangst wrote: >> Sevan Janiyan wrote: >>> Hello, >>> Attached patches remove the main() prototype from >>> src/{sbin,usr.bin,usb.sbin} >> >> yes! > > I'm 100% certain I added those prototypes because some version of gcc > with the appropriate warning flags would flag main as missing a prototype. > > If that era is now gone, I'm all too happy for it to vanish. Happy to do a build run with the relevant flags, need to fish those out from history if they do not come to mind after all this time :) Sevan
Re: smtpd config parsing cleanup
> On 09 Sep 2016, at 13:35, Eric Faurotwrote: > > Because of the small ad hoc changes that were made here and there over > the years, the listener config code has become a bit convoluted I think. Yes. > Here is a little cleanup to: > > - have all listener creation functions take listen_opts as param, > and call config_listener() when done, which adds the listener(s) > to the current config list of listeners. > > - make the fallback chain between interface(), host_v4() host_v6() > and host_dns() obvious when creating an if_listener. > > - fix a bug where the specified family was ignored if the listener > is given as a hostname. > > > Comments? I like that. ok jung@ > Eric. > > Index: parse.y > === > RCS file: /cvs/src/usr.sbin/smtpd/parse.y,v > retrieving revision 1.189 > diff -u -p -r1.189 parse.y > --- parse.y 31 Aug 2016 15:24:04 - 1.189 > +++ parse.y 9 Sep 2016 11:11:54 - > @@ -138,15 +138,14 @@ static struct listen_opts { > uint32_toptions; > } listen_opts; > > -static struct listener *create_sock_listener(struct listen_opts *); > -static void create_if_listener(struct listenerlist *, struct > listen_opts *); > -static void config_listener(struct listener *, struct listen_opts > *); > - > -struct listener *host_v4(const char *, in_port_t); > -struct listener *host_v6(const char *, in_port_t); > -int host_dns(struct listenerlist *, struct listen_opts *); > -int host(struct listenerlist *, struct listen_opts *); > -int interface(struct listenerlist *, struct listen_opts *); > +static void create_sock_listener(struct listen_opts *); > +static void create_if_listener(struct listen_opts *); > +static void config_listener(struct listener *, struct listen_opts *); > +static int host_v4(struct listen_opts *); > +static int host_v6(struct listen_opts *); > +static int host_dns(struct listen_opts *); > +static int interface(struct listen_opts *); > + > void set_local(const char *); > void set_localaddrs(struct table *); > intdelaytonum(char *); > @@ -695,13 +694,13 @@ socket_listener : SOCKET sock_listen { > yyerror("socket listener already configured"); > YYERROR; > } > - conf->sc_sock_listener = > create_sock_listener(_opts); > + create_sock_listener(_opts); > } > ; > > if_listener : STRING if_listen { > listen_opts.ifx = $1; > - create_if_listener(conf->sc_listeners, _opts); > + create_if_listener(_opts); > } > ; > > @@ -2005,7 +2004,7 @@ parse_config(struct smtpd *x_conf, const > /* If the socket listener was not configured, create a default one. */ > if (!conf->sc_sock_listener) { > memset(_opts, 0, sizeof listen_opts); > - conf->sc_sock_listener = create_sock_listener(_opts); > + create_sock_listener(_opts); > } > > /* Free macros and check which have not been used. */ > @@ -2109,7 +2108,7 @@ symget(const char *nam) > return (NULL); > } > > -static struct listener * > +static void > create_sock_listener(struct listen_opts *lo) > { > struct listener *l = xcalloc(1, sizeof(*l), "create_sock_listener"); > @@ -2118,13 +2117,12 @@ create_sock_listener(struct listen_opts > l->ss.ss_family = AF_LOCAL; > l->ss.ss_len = sizeof(struct sockaddr *); > l->local = 1; > + conf->sc_sock_listener = l; > config_listener(l, lo); > - > - return (l); > } > > static void > -create_if_listener(struct listenerlist *ll, struct listen_opts *lo) > +create_if_listener(struct listen_opts *lo) > { > uint16_tflags; > > @@ -2142,17 +2140,11 @@ create_if_listener(struct listenerlist * > if (lo->port) { > lo->flags = lo->ssl|lo->auth|flags; > lo->port = htons(lo->port); > - if (!interface(ll, lo)) > - if (host(ll, lo) <= 0) > - errx(1, "invalid virtual ip or interface: %s", > lo->ifx); > } > else { > if (lo->ssl & F_SMTPS) { > lo->port = htons(465); > lo->flags = F_SMTPS|lo->auth|flags; > - if (!interface(ll, lo)) > - if (host(ll, lo) <= 0) > - errx(1, "invalid virtual ip or > interface: %s", lo->ifx); > } > > if (!lo->ssl || (lo->ssl & F_STARTTLS)) { > @@ -2160,11 +2152,19 @@ create_if_listener(struct listenerlist * > lo->flags = lo->auth|flags; > if (lo->ssl & F_STARTTLS) > lo->flags |=
smtpd config parsing cleanup
Because of the small ad hoc changes that were made here and there over the years, the listener config code has become a bit convoluted I think. Here is a little cleanup to: - have all listener creation functions take listen_opts as param, and call config_listener() when done, which adds the listener(s) to the current config list of listeners. - make the fallback chain between interface(), host_v4() host_v6() and host_dns() obvious when creating an if_listener. - fix a bug where the specified family was ignored if the listener is given as a hostname. Comments? Eric. Index: parse.y === RCS file: /cvs/src/usr.sbin/smtpd/parse.y,v retrieving revision 1.189 diff -u -p -r1.189 parse.y --- parse.y 31 Aug 2016 15:24:04 - 1.189 +++ parse.y 9 Sep 2016 11:11:54 - @@ -138,15 +138,14 @@ static struct listen_opts { uint32_toptions; } listen_opts; -static struct listener *create_sock_listener(struct listen_opts *); -static void create_if_listener(struct listenerlist *, struct listen_opts *); -static void config_listener(struct listener *, struct listen_opts *); - -struct listener*host_v4(const char *, in_port_t); -struct listener*host_v6(const char *, in_port_t); -int host_dns(struct listenerlist *, struct listen_opts *); -int host(struct listenerlist *, struct listen_opts *); -int interface(struct listenerlist *, struct listen_opts *); +static voidcreate_sock_listener(struct listen_opts *); +static voidcreate_if_listener(struct listen_opts *); +static voidconfig_listener(struct listener *, struct listen_opts *); +static int host_v4(struct listen_opts *); +static int host_v6(struct listen_opts *); +static int host_dns(struct listen_opts *); +static int interface(struct listen_opts *); + voidset_local(const char *); voidset_localaddrs(struct table *); int delaytonum(char *); @@ -695,13 +694,13 @@ socket_listener : SOCKET sock_listen { yyerror("socket listener already configured"); YYERROR; } - conf->sc_sock_listener = create_sock_listener(_opts); + create_sock_listener(_opts); } ; if_listener: STRING if_listen { listen_opts.ifx = $1; - create_if_listener(conf->sc_listeners, _opts); + create_if_listener(_opts); } ; @@ -2005,7 +2004,7 @@ parse_config(struct smtpd *x_conf, const /* If the socket listener was not configured, create a default one. */ if (!conf->sc_sock_listener) { memset(_opts, 0, sizeof listen_opts); - conf->sc_sock_listener = create_sock_listener(_opts); + create_sock_listener(_opts); } /* Free macros and check which have not been used. */ @@ -2109,7 +2108,7 @@ symget(const char *nam) return (NULL); } -static struct listener * +static void create_sock_listener(struct listen_opts *lo) { struct listener *l = xcalloc(1, sizeof(*l), "create_sock_listener"); @@ -2118,13 +2117,12 @@ create_sock_listener(struct listen_opts l->ss.ss_family = AF_LOCAL; l->ss.ss_len = sizeof(struct sockaddr *); l->local = 1; + conf->sc_sock_listener = l; config_listener(l, lo); - - return (l); } static void -create_if_listener(struct listenerlist *ll, struct listen_opts *lo) +create_if_listener(struct listen_opts *lo) { uint16_tflags; @@ -2142,17 +2140,11 @@ create_if_listener(struct listenerlist * if (lo->port) { lo->flags = lo->ssl|lo->auth|flags; lo->port = htons(lo->port); - if (!interface(ll, lo)) - if (host(ll, lo) <= 0) - errx(1, "invalid virtual ip or interface: %s", lo->ifx); } else { if (lo->ssl & F_SMTPS) { lo->port = htons(465); lo->flags = F_SMTPS|lo->auth|flags; - if (!interface(ll, lo)) - if (host(ll, lo) <= 0) - errx(1, "invalid virtual ip or interface: %s", lo->ifx); } if (!lo->ssl || (lo->ssl & F_STARTTLS)) { @@ -2160,11 +2152,19 @@ create_if_listener(struct listenerlist * lo->flags = lo->auth|flags; if (lo->ssl & F_STARTTLS) lo->flags |= F_STARTTLS; - if (!interface(ll, lo)) - if (host(ll, lo) <= 0) - errx(1, "invalid virtual ip or interface:
Re: reduce double caching in mfs
On 08/09/16(Thu) 14:49, Ted Unangst wrote: > Currently, the bufcache doesn't know that mfs is backed by memory. All i/o to > mfs ends up being double cached, once in the userland process and again in the > kernel bufcache. This is wasteful. In particular, it means one can't use mfs > to increase the effective size of the buffer cache. Reading or writing to mfs > will push out buffers used for disk caching. (I think you can even end up with > triple buffering when mfs starts swapping...) Isn't the solution to this problem a working dynamic buffer cache? I'm not sure adding a hack for mfs, and the complexity that comes with it, is the way to go. Did somebody analyzed what broke when the buffer cache was cranked to 90%? > This is mostly inherent to the design of mfs. But with a small tweak to the > buffer cache, we can improve the situation. This introduces the concept of > 'cheap' buffers, a hint to the buffer cache that they are easily replaced. > (There's a 'nocache' flag already, but it's not suitable here.) When mfs > finishes with a buf, it marks it cheap. Then it goes onto a special queue that > gets chewed up before we start looking at the regular caches. We still cache > some number of cheap buffers to prevent constant memory copying. > > With this diff, I've confirmed that reading/writing large files to mfs doesn't > flush the cache, but performance appears about the same. (Of particular note, > my bufcache is big enough to cache all of src/, but not src/ and obj/. With > obj/ on mfs, src never gets flushed.) > > Index: kern/vfs_bio.c > === > RCS file: /cvs/src/sys/kern/vfs_bio.c,v > retrieving revision 1.176 > diff -u -p -r1.176 vfs_bio.c > --- kern/vfs_bio.c4 Sep 2016 10:51:24 - 1.176 > +++ kern/vfs_bio.c8 Sep 2016 18:31:52 - > @@ -93,7 +93,10 @@ int bd_req;/* Sleep point for > cleaner > > #define NUM_CACHES 2 > #define DMA_CACHE 0 > +#define CHEAP_LIMIT 256 > struct bufcache cleancache[NUM_CACHES]; > +struct bufqueue cheapqueue; > +u_int cheapqueuelen; > struct bufqueue dirtyqueue; > > void > @@ -1297,6 +1300,7 @@ bufcache_init(void) > TAILQ_INIT([i].coldqueue); > TAILQ_INIT([i].warmqueue); > } > + TAILQ_INIT(); > TAILQ_INIT(); > } > > @@ -1329,6 +1333,12 @@ bufcache_getcleanbuf(int cachenum, int d > > splassert(IPL_BIO); > > + /* try cheap queue if over limit */ > + if (discard || cheapqueuelen > CHEAP_LIMIT) { > + if ((bp = TAILQ_FIRST())) > + return bp; > + } > + > /* try cold queue */ > while ((bp = TAILQ_FIRST(>coldqueue))) { > if ((!discard) && > @@ -1356,6 +1366,8 @@ bufcache_getcleanbuf(int cachenum, int d > /* buffer is cold - give it up */ > return bp; > } > + if ((bp = TAILQ_FIRST())) > + return bp; > if ((bp = TAILQ_FIRST(>warmqueue))) > return bp; > if ((bp = TAILQ_FIRST(>hotqueue))) > @@ -1410,6 +1422,13 @@ bufcache_take(struct buf *bp) > pages = atop(bp->b_bufsize); > struct bufcache *cache = [bp->cache]; > if (!ISSET(bp->b_flags, B_DELWRI)) { > + if (ISSET(bp->b_flags, B_CHEAP)) { > + TAILQ_REMOVE(, bp, b_freelist); > + cheapqueuelen--; > + CLR(bp->b_flags, B_CHEAP); > + return; > + } > + > if (ISSET(bp->b_flags, B_COLD)) { > queue = >coldqueue; > } else if (ISSET(bp->b_flags, B_WARM)) { > @@ -1462,11 +1481,17 @@ bufcache_release(struct buf *bp) > struct bufqueue *queue; > int64_t pages; > struct bufcache *cache = [bp->cache]; > + > pages = atop(bp->b_bufsize); > KASSERT(ISSET(bp->b_flags, B_BC)); > KASSERT((ISSET(bp->b_flags, B_DMA) && bp->cache == 0) > || ((!ISSET(bp->b_flags, B_DMA)) && bp->cache > 0)); > if (!ISSET(bp->b_flags, B_DELWRI)) { > + if (ISSET(bp->b_flags, B_CHEAP)) { > + TAILQ_INSERT_TAIL(, bp, b_freelist); > + cheapqueuelen++; > + return; > + } > int64_t *queuepages; > if (ISSET(bp->b_flags, B_WARM | B_COLD)) { > SET(bp->b_flags, B_WARM); > Index: sys/buf.h > === > RCS file: /cvs/src/sys/sys/buf.h,v > retrieving revision 1.103 > diff -u -p -r1.103 buf.h > --- sys/buf.h 23 May 2016 09:31:28 - 1.103 > +++ sys/buf.h 8 Sep 2016 17:20:12 - > @@ -221,12 +221,14 @@ struct bufcache { > #define B_COLD 0x0100 /* buffer is on the cold queue > */ > #define B_BC0x0200 /* buffer is managed by the > cache */ > #define B_DMA 0x0400 /*