Re: wol support for bge

2014-04-23 Thread Mark Kettenis
 Date: Wed, 23 Apr 2014 11:43:06 -0500
 From: Abel Abraham Camarillo Ojeda acam...@verlet.org
 
 This should work on sparc64?

Unlikely.



Re: ddb, USB keyboard and Apple machines.

2014-04-26 Thread Mark Kettenis
 Date: Sat, 26 Apr 2014 17:57:30 +0200
 From: Martin Pieuchot mpieuc...@nolizard.org
 
 It's actually impossible to use a USB keyboard to enter ddb(8) on most
 of the G3/G4 that come with such keyboard since they have a bluetooth
 HID device that attaches as the console keyboard.
 
 I assume this is also the case on various x86 Apple laptops, but I don't
 have any hardware to test.
 
 Diff below work around that and let me use my keyboard in DDB. 
 
 ok?

I believe that the bluetooth HID device is there to support wireless
keyboards.  This diff will probably break using those as the console
keyboard.  Probably not really an issue as:

1. Wired keyboard users probably still outnumber wireless keyboard users.

2. Wireless keyboards should be considered insecure, so not attaching
   these as the console keyboard might be considered a good thing.


 Index: ukbd.c
 ===
 RCS file: /cvs/src/sys/dev/usb/ukbd.c,v
 retrieving revision 1.65
 diff -u -p -r1.65 ukbd.c
 --- ukbd.c24 Apr 2014 09:40:28 -  1.65
 +++ ukbd.c26 Apr 2014 14:10:09 -
 @@ -212,6 +212,7 @@ ukbd_attach(struct device *parent, struc
   struct usb_hid_descriptor *hid;
   u_int32_t qflags;
   int dlen, repid;
 + int console = 1;
   void *desc;
   kbd_t layout = (kbd_t)-1;
  
 @@ -226,8 +227,16 @@ ukbd_attach(struct device *parent, struc
   sc-sc_hdev.sc_osize = hid_report_size(desc, dlen, hid_output, repid);
   sc-sc_hdev.sc_fsize = hid_report_size(desc, dlen, hid_feature, repid);
  
 +  /*
 +   * Since the HID-Proxy is always detected before any
 +   * real keyboard, do not let it grab the console.
 +   */
 + if (uha-uaa-vendor == USB_VENDOR_APPLE 
 + uha-uaa-product == USB_PRODUCT_APPLE_BLUETOOTH_HCI)
 + console = 0;
 +
   qflags = usbd_get_quirks(sc-sc_hdev.sc_udev)-uq_flags;
 - if (hidkbd_attach(self, kbd, 1, qflags, repid, desc, dlen) != 0)
 + if (hidkbd_attach(self, kbd, console, qflags, repid, desc, dlen) != 0)
   return;
  
   if (uha-uaa-vendor == USB_VENDOR_APPLE) {
 
 



acpi interrupt routing diff

2014-04-27 Thread Mark Kettenis
The current acpiprt(4) code doesn't handle non-standard polarity and
trigger mode correctly.  Typically not a problem for real hardware,
but some virtualization stuff gets creative.  The diff below tries to
do a better job.  It fixes the qemu power button.  But before I can
commit this, it needs testing on more than just my thinkpad laptops.

So please give this a spin on your hardware, especially servers and
desktop machines.


Index: acpimadt.c
===
RCS file: /cvs/src/sys/dev/acpi/acpimadt.c,v
retrieving revision 1.26
diff -u -p -r1.26 acpimadt.c
--- acpimadt.c  7 Jan 2012 20:13:16 -   1.26
+++ acpimadt.c  27 Apr 2014 15:57:31 -
@@ -216,8 +216,7 @@ acpimadt_attach(struct device *parent, s
arg.type = AML_OBJTYPE_INTEGER;
arg.v_integer = 1;
 
-   if (aml_evalname(acpi_sc, NULL, \\_PIC, 1, arg, NULL) != 0)
-   return;
+   aml_evalname(acpi_sc, NULL, \\_PIC, 1, arg, NULL);
 
mp_busses = acpimadt_busses;
mp_nbusses = nitems(acpimadt_busses);
Index: acpiprt.c
===
RCS file: /cvs/src/sys/dev/acpi/acpiprt.c,v
retrieving revision 1.44
diff -u -p -r1.44 acpiprt.c
--- acpiprt.c   22 Dec 2013 18:55:25 -  1.44
+++ acpiprt.c   27 Apr 2014 15:57:31 -
@@ -41,6 +41,13 @@
 
 #include ioapic.h
 
+struct acpiprt_irq {
+   int _int;
+   int _shr;
+   int _ll;
+   int _he;
+};
+
 struct acpiprt_map {
int bus, dev;
int pin;
@@ -134,16 +141,29 @@ acpiprt_attach(struct device *parent, st
 int
 acpiprt_getirq(union acpi_resource *crs, void *arg)
 {
-   int *irq = (int *)arg;
-   int typ;
+   struct acpiprt_irq *irq = arg;
+   int typ, len;
+
+   irq-_shr = 0;
+   irq-_ll = 0;
+   irq-_he = 1;
 
typ = AML_CRSTYPE(crs);
+   len = AML_CRSLEN(crs);
switch (typ) {
case SR_IRQ:
-   *irq = ffs(letoh16(crs-sr_irq.irq_mask)) - 1;
+   irq-_int= ffs(letoh16(crs-sr_irq.irq_mask)) - 1;
+   if (len  2) {
+   irq-_shr = (crs-sr_irq.irq_flags  SR_IRQ_SHR);
+   irq-_ll = (crs-sr_irq.irq_flags  SR_IRQ_POLARITY);
+   irq-_he = (crs-sr_irq.irq_flags  SR_IRQ_MODE);
+   }
break;
case LR_EXTIRQ:
-   *irq = letoh32(crs-lr_extirq.irq[0]);
+   irq-_int = letoh32(crs-lr_extirq.irq[0]);
+   irq-_shr = (crs-lr_extirq.flags  LR_EXTIRQ_SHR);
+   irq-_ll = (crs-lr_extirq.flags  LR_EXTIRQ_POLARITY);
+   irq-_he = (crs-lr_extirq.flags  LR_EXTIRQ_MODE);
break;
default:
printf(unknown interrupt: %x\n, typ);
@@ -174,35 +194,48 @@ acpiprt_pri[16] = {
 int
 acpiprt_chooseirq(union acpi_resource *crs, void *arg)
 {
-   int *irq = (int *)arg;
-   int typ, i, pri = -1;
+   struct acpiprt_irq *irq = arg;
+   int typ, len, i, pri = -1;
+
+   irq-_shr = 0;
+   irq-_ll = 0;
+   irq-_he = 1;
 
typ = AML_CRSTYPE(crs);
+   len = AML_CRSLEN(crs);
switch (typ) {
case SR_IRQ:
for (i = 0; i  sizeof(crs-sr_irq.irq_mask) * 8; i++) {
if (crs-sr_irq.irq_mask  (1  i) 
acpiprt_pri[i]  pri) {
-   *irq = i;
-   pri = acpiprt_pri[*irq];
+   irq-_int = i;
+   pri = acpiprt_pri[irq-_int];
}
}
+   if (len  2) {
+   irq-_shr = (crs-sr_irq.irq_flags  SR_IRQ_SHR);
+   irq-_ll = (crs-sr_irq.irq_flags  SR_IRQ_POLARITY);
+   irq-_he = (crs-sr_irq.irq_flags  SR_IRQ_MODE);
+   }
break;
case LR_EXTIRQ:
/* First try non-8259 interrupts. */
for (i = 0; i  crs-lr_extirq.irq_count; i++) {
if (crs-lr_extirq.irq[i]  15) {
-   *irq = crs-lr_extirq.irq[i];
+   irq-_int = crs-lr_extirq.irq[i];
return (0);
}
}
 
for (i = 0; i  crs-lr_extirq.irq_count; i++) {
if (acpiprt_pri[crs-lr_extirq.irq[i]]  pri) {
-   *irq = crs-lr_extirq.irq[i];
-   pri = acpiprt_pri[*irq];
+   irq-_int = crs-lr_extirq.irq[i];
+   pri = acpiprt_pri[irq-_int];
}
}
+   irq-_shr = (crs-lr_extirq.flags  LR_EXTIRQ_SHR);
+   irq-_ll = (crs-lr_extirq.flags  LR_EXTIRQ_POLARITY);
+   irq-_he = (crs-lr_extirq.flags  LR_EXTIRQ_MODE);
break;

Re: Switch getopt example to getprogname()

2014-04-29 Thread Mark Kettenis
 Date: Tue, 29 Apr 2014 11:31:48 +0200
 From: Tristan Le Guern tlegu...@bouledef.eu
 
 Hi,
 
 This patch for /usr/share/misc/getopt enforces the use of getprogname()
 instead of __progname.
 
 Is this desirable? If so I also have a patch for style(9).

getprogname(3) isn't really more portable than __progname, and the
latter is probably slightly more efficient (smaller code).  So I don't
really see the point.



Re: data modified on freelist, tmpfs-related?

2014-04-30 Thread Mark Kettenis
 Date: Wed, 30 Apr 2014 13:39:20 +0100
 From: Stuart Henderson st...@openbsd.org
 
 Seen when running e2fsprogs regression tests with /tmp on tmpfs

I'm not surprised; tmpfs contains some serious bugs.  I recommend not
using it until those are fixed.

 Data modified on freelist: word -35183628471970 of object 0x80d36c00 
 size 0x400 previous type free (invalid addr 0xf40858de1f81cbe9)
 panic: Data modified on freelist: word 4 of object 0x80d36c00 size 
 0x400 previous type free (0x0 != 0xdeafbead)
 
 Stopped at  Debugger+0x5:   leave   
 RUN AT LEAST 'trace' AND 'ps' AND INCLUDE OUTPUT WHEN REPORTING THIS PANIC!
 IF RUNNING SMP, USE 'mach ddbcpu #' AND 'trace' ON OTHER PROCESSORS, TOO.
 DO NOT EVEN BOTHER REPORTING THIS WITHOUT INCLUDING THAT INFORMATION!
 ddb{1} Debugger() at Debugger+0x5
 panic() at panic+0xfe
 malloc() at malloc+0x697
 hashinit() at hashinit+0x3b
 uao_grow_hash() at uao_grow_hash+0x70
 tmpfs_reg_resize() at tmpfs_reg_resize+0xe4
 tmpfs_write() at tmpfs_write+0xfd
 VOP_WRITE() at VOP_WRITE+0x3f
 vn_write() at vn_write+0x98
 dofilewritev() at dofilewritev+0x1c5
 sys_write() at sys_write+0xaa
 syscall() at syscall+0x297
 --- syscall (number 4) ---
 end of kernel
 end trace frame: 0x7f7cfd50, count: -12
 0xb25a740770a:
 ddb{1} ds 0x296
 es0x6930
 fs0x6900
 gs0xd1ee
 rdi  0x1
 rsi0x296
 rbp   0x800033276920
 rbx   0x817dac70seltrue_filtops+0xa10
 rdx0
 rcx   0x801c7000
 rax  0x1
 r80x800033276840
 r9 0
 r100
 r110
 r120x100
 r13   0x800033276930
 r14  0xa
 r15  0x5
 rip   0x813403e5Debugger+0x5
 cs   0x8
 rflags 0x202
 rsp   0x800033276920
 ss  0x10
 Debugger+0x5:   leave   
 
 The end of the dmesg buffer was overwritten by kernel output from
 the following boot, but I have screenshots of bcstats, uvmexp and
 the first page of ps output (active process is gunzip) here:
 
 https://drive.google.com/folderview?id=0B8t-sinTZPnucERodGdCcTc2Mmcusp=sharing
 
 OpenBSD 5.5-current (GENERIC.MP) #6: Sun Apr 27 14:42:50 BST 2014
 st...@bamboo.spacehopper.org:/usr/src/sys/arch/amd64/compile/GENERIC.MP
 real mem = 8451125248 (8059MB)
 avail mem = 8217436160 (7836MB)
 mpath0 at root
 scsibus0 at mpath0: 256 targets
 mainbus0 at root
 bios0 at mainbus0: SMBIOS rev. 2.6 @ 0xdae9c000 (66 entries)
 bios0: vendor LENOVO version 8DET63WW (1.33 ) date 07/19/2012
 bios0: LENOVO 4287CTO
 acpi0 at bios0: rev 2
 acpi0: sleep states S0 S3 S4 S5
 acpi0: tables DSDT FACP SLIC SSDT SSDT SSDT HPET APIC MCFG ECDT ASF! TCPA 
 SSDT SSDT UEFI UEFI UEFI
 acpi0: wakeup devices LID_(S3) SLPB(S3) IGBE(S4) EXP4(S4) EXP7(S4) EHC1(S3) 
 EHC2(S3) HDEF(S4)
 acpitimer0 at acpi0: 3579545 Hz, 24 bits
 acpihpet0 at acpi0: 14318179 Hz
 acpimadt0 at acpi0 addr 0xfee0: PC-AT compat
 cpu0 at mainbus0: apid 0 (boot processor)
 cpu0: Intel(R) Core(TM) i7-2640M CPU @ 2.80GHz, 2791.30 MHz
 cpu0: 
 FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,POPCNT,DEADLINE,AES,XSAVE,AVX,NXE,LONG,LAHF,PERF,ITSC
 cpu0: 256KB 64b/line 8-way L2 cache
 cpu0: smt 0, core 0, package 0
 mtrr: Pentium Pro MTRR support, 10 var ranges, 88 fixed ranges
 cpu0: apic clock running at 99MHz
 cpu0: mwait min=64, max=64, C-substates=0.2.1.1.2, IBE
 cpu1 at mainbus0: apid 1 (application processor)
 cpu1: Intel(R) Core(TM) i7-2640M CPU @ 2.80GHz, 2790.94 MHz
 cpu1: 
 FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,POPCNT,DEADLINE,AES,XSAVE,AVX,NXE,LONG,LAHF,PERF,ITSC
 cpu1: 256KB 64b/line 8-way L2 cache
 cpu1: smt 1, core 0, package 0
 cpu2 at mainbus0: apid 2 (application processor)
 cpu2: Intel(R) Core(TM) i7-2640M CPU @ 2.80GHz, 2790.94 MHz
 cpu2: 
 FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,POPCNT,DEADLINE,AES,XSAVE,AVX,NXE,LONG,LAHF,PERF,ITSC
 cpu2: 256KB 64b/line 8-way L2 cache
 cpu2: smt 0, core 1, package 0
 cpu3 at mainbus0: apid 3 (application processor)
 cpu3: Intel(R) Core(TM) i7-2640M CPU @ 2.80GHz, 2790.94 MHz
 cpu3: 
 

Re: data modified on freelist, tmpfs-related?

2014-04-30 Thread Mark Kettenis
 From: Mike Belopuhov m...@belopuhov.com
 Date: Wed, 30 Apr 2014 16:00:45 +0200
 
 On 30 April 2014 15:55, Mark Kettenis mark.kette...@xs4all.nl wrote:
  Date: Wed, 30 Apr 2014 15:38:39 +0200 (CEST)
  From: Mark Kettenis mark.kette...@xs4all.nl
 
   Date: Wed, 30 Apr 2014 13:39:20 +0100
   From: Stuart Henderson st...@openbsd.org
  
   Seen when running e2fsprogs regression tests with /tmp on tmpfs
 
  I'm not surprised; tmpfs contains some serious bugs.  I recommend not
  using it until those are fixed.
 
  Which means, I'd like somebody else besides espie@ to comment on my
  uvm_aobj.c list manipulation hack diff.
 
 
 Diff made sense to me when I looked at it, but I would rather hide
 direct pointer access :/  Perhaps LIST_SWAP does a tiny bit more,
 but it's cleaner and perhaps can be useful in the future.

I'm not comfortable with introducing more sys/queue.h APIs.  So
perhaps we should just punt on the optimization and remove/insert all
list items.  Removing the trap comments that pedro set up...

Index: uvm_aobj.c
===
RCS file: /cvs/src/sys/uvm/uvm_aobj.c,v
retrieving revision 1.61
diff -u -p -r1.61 uvm_aobj.c
--- uvm_aobj.c  13 Apr 2014 23:14:15 -  1.61
+++ uvm_aobj.c  30 Apr 2014 14:52:33 -
@@ -431,6 +431,7 @@ uao_shrink_hash(struct uvm_object *uobj,
 {
struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
struct uao_swhash *new_swhash;
+   struct uao_swhash_elt *elt;
unsigned long new_hashmask;
int i;
 
@@ -456,8 +457,13 @@ uao_shrink_hash(struct uvm_object *uobj,
 * Even though the hash table size is changing, the hash of the buckets
 * we are interested in copying should not change.
 */
-   for (i = 0; i  UAO_SWHASH_BUCKETS(aobj-u_pages); i++)
-   LIST_FIRST(new_swhash[i]) = LIST_FIRST(aobj-u_swhash[i]);
+   for (i = 0; i  UAO_SWHASH_BUCKETS(aobj-u_pages); i++) {
+   while (LIST_EMPTY(aobj-u_swhash[i]) == 0) {
+   elt = LIST_FIRST(aobj-u_swhash[i]);
+   LIST_REMOVE(elt, list);
+   LIST_INSERT_HEAD(new_swhash[i], elt, list);
+   }
+   }
 
free(aobj-u_swhash, M_UVMAOBJ);
 
@@ -609,7 +615,6 @@ uao_grow_hash(struct uvm_object *uobj, i
return ENOMEM;
 
for (i = 0; i  UAO_SWHASH_BUCKETS(aobj-u_pages); i++) {
-   /* XXX pedro: shouldn't copying the list pointers be enough? */
while (LIST_EMPTY(aobj-u_swhash[i]) == 0) {
elt = LIST_FIRST(aobj-u_swhash[i]);
LIST_REMOVE(elt, list);





Re: What's the pid of the pagedaemon - intro(2)

2014-05-02 Thread Mark Kettenis
 From: j...@wxcvbn.org (=?utf-8?Q?J=C3=A9r=C3=A9mie_Courr=C3=A8ges-Anglas?=)
 Date: Fri, 02 May 2014 10:17:45 +0200
 
 I don't know when this changed, but the information below seems no
 longer relevant.
 
 ok?

Kill it!

 Index: intro.2
 ===
 RCS file: /cvs/src/lib/libc/sys/intro.2,v
 retrieving revision 1.48
 diff -u -p -p -u -r1.48 intro.2
 --- intro.2   21 Jan 2014 03:15:45 -  1.48
 +++ intro.2   30 Apr 2014 09:40:22 -
 @@ -556,7 +556,6 @@ Process 1 is the initialization process
  .Xr init 8 ,
  and is the ancestor of every other process in the system.
  It is used to control the process structure.
 -Process 2 is the paging daemon.
  .It Descriptor
  An integer assigned by the system when a file is referenced
  by
 
 
 -- 
 jca | PGP : 0x1524E7EE / 5135 92C1 AD36 5293 2BDF  DDCC 0DFA 74AE 1524 E7EE
 
 



uvm integer overflows

2014-05-05 Thread Mark Kettenis
Inspired by some commits in bitrig, I did an audit for potential
integer overflows caused by converting a page number into an
offset/size/address by shifting by PAGE_SHIFT.  While doing so, I
noticed that uvm_objwire/unwire really should really use voff_t
instead of off_t.

There is one potential overflow left that this diff doesn't address.
In uvm_swap.c:uvm_swap_io() there is a line that reads:

bp-b_dirtyend = npages  PAGE_SHIFT;

Potentially this could overflow, but given that the b_dirtyend member
of struct buf is an int, there's not much we can do about this.

ok?


Index: uvm_aobj.c
===
RCS file: /cvs/src/sys/uvm/uvm_aobj.c,v
retrieving revision 1.63
diff -u -p -r1.63 uvm_aobj.c
--- uvm_aobj.c  30 Apr 2014 19:25:14 -  1.63
+++ uvm_aobj.c  5 May 2014 18:14:19 -
@@ -422,7 +422,8 @@ uao_shrink_flush(struct uvm_object *uobj
 {
KASSERT(startpg  endpg);
KASSERT(uobj-uo_refs == 1);
-   uao_flush(uobj, startpg  PAGE_SHIFT, endpg  PAGE_SHIFT, PGO_FREE);
+   uao_flush(uobj, (voff_t)startpg  PAGE_SHIFT,
+   (voff_t)endpg  PAGE_SHIFT, PGO_FREE);
uao_dropswap_range(uobj, startpg, endpg);
 }
 
@@ -909,14 +910,14 @@ uao_flush(struct uvm_object *uobj, voff_
 
if (flags  PGO_ALLPAGES) {
start = 0;
-   stop = aobj-u_pages  PAGE_SHIFT;
+   stop = (voff_t)aobj-u_pages  PAGE_SHIFT;
} else {
start = trunc_page(start);
stop = round_page(stop);
-   if (stop  (aobj-u_pages  PAGE_SHIFT)) {
+   if (stop  ((voff_t)aobj-u_pages  PAGE_SHIFT)) {
printf(uao_flush: strange, got an out of range 
flush (fixed)\n);
-   stop = aobj-u_pages  PAGE_SHIFT;
+   stop = (voff_t)aobj-u_pages  PAGE_SHIFT;
}
}
 
@@ -1414,7 +1415,7 @@ uao_pagein_page(struct uvm_aobj *aobj, i
 
pg = NULL;
npages = 1;
-   rv = uao_get(aobj-u_obj, pageidx  PAGE_SHIFT,
+   rv = uao_get(aobj-u_obj, (voff_t)pageidx  PAGE_SHIFT,
 pg, npages, 0, VM_PROT_READ|VM_PROT_WRITE, 0, 0);
 
switch (rv) {
@@ -1511,7 +1512,7 @@ uao_dropswap_range(struct uvm_object *uo
int slot = elt-slots[j];
 
KASSERT(uvm_pagelookup(aobj-u_obj,
-   (UAO_SWHASH_ELT_PAGEIDX_BASE(elt)
+   
(voff_t)(UAO_SWHASH_ELT_PAGEIDX_BASE(elt)
+ j)  PAGE_SHIFT) == NULL);
 
if (slot  0) {
Index: uvm_fault.c
===
RCS file: /cvs/src/sys/uvm/uvm_fault.c,v
retrieving revision 1.72
diff -u -p -r1.72 uvm_fault.c
--- uvm_fault.c 13 Apr 2014 23:14:15 -  1.72
+++ uvm_fault.c 5 May 2014 18:14:19 -
@@ -622,7 +622,7 @@ ReFault:
/* wide fault (!narrow) */
nback = min(uvmadvice[ufi.entry-advice].nback,
(ufi.orig_rvaddr - ufi.entry-start)  PAGE_SHIFT);
-   startva = ufi.orig_rvaddr - (nback  PAGE_SHIFT);
+   startva = ufi.orig_rvaddr - ((vsize_t)nback  PAGE_SHIFT);
nforw = min(uvmadvice[ufi.entry-advice].nforw,
((ufi.entry-end - ufi.orig_rvaddr) 
 PAGE_SHIFT) - 1);
@@ -664,13 +664,13 @@ ReFault:
if (uobj) {
uoff = (startva - ufi.entry-start) + ufi.entry-offset;
(void) uobj-pgops-pgo_flush(uobj, uoff, uoff + 
-   (nback  PAGE_SHIFT), PGO_DEACTIVATE);
+   ((vsize_t)nback  PAGE_SHIFT), PGO_DEACTIVATE);
}
 
/* now forget about the backpages */
if (amap)
anons += nback;
-   startva += (nback  PAGE_SHIFT);
+   startva += ((vsize_t)nback  PAGE_SHIFT);
npages -= nback;
centeridx = 0;
}
Index: uvm_object.c
===
RCS file: /cvs/src/sys/uvm/uvm_object.c,v
retrieving revision 1.7
diff -u -p -r1.7 uvm_object.c
--- uvm_object.c30 May 2013 15:17:59 -  1.7
+++ uvm_object.c5 May 2014 18:14:19 -
@@ -64,12 +64,12 @@ uvm_objinit(struct uvm_object *uobj, str
  */
 
 int
-uvm_objwire(struct uvm_object *uobj, off_t start, off_t end,
+uvm_objwire(struct uvm_object *uobj, voff_t start, voff_t end,
 struct pglist *pageq)
 {
-   int i, npages, error;
+   int i, npages, left, error;
struct vm_page *pgs[FETCH_PAGECOUNT];
-   off_t offset = start, left;
+   voff_t offset = start;
 
left = (end - start)  

Re: sparc64: problem after trap table takeover under QEMU

2014-05-08 Thread Mark Kettenis
 Date: Thu, 08 May 2014 14:44:30 +0100
 From: Mark Cave-Ayland mark.cave-ayl...@ilande.co.uk
 
 On 06/05/14 19:18, Mark Cave-Ayland wrote:

Hi Mark,

Interesting to see sparc64 support in QEMU.  

  As soon as I step into address 0x1001804 then this is where things start
  to go wrong; the TLB (TTE) entry for 0x180 which is accessed by %sp
  is marked as privileged, but ASI 0x11 is user access only. QEMU's
  current behaviour for this is to generate a datafault for the page at
  0x180 which seems to get all the way through to the retry at the end
  of winfixsave, but then hits the breakpoint trap above when executing
  the retry.
 
 I've finally located the source of this bug thanks to more testing, 
 which showed that OpenBSD 4.9 was surprisingly also able to boot 
 (something I missed this in my original bisection). This allowed me to 
 track down what was happening fairly easily. The problem is caused by 
 the fact that 0x180 has *two* mappings in the TLB and the way in 
 which QEMU resolves them.
 
 Compare the state of the TLB when the fill_0_normal trap occurs on 
 OpenBSD 5.5 (faults, incorrect) and OpenBSD 4.9 (no fault, correct):
 
 
 OpenBSD 5.5:
 
 (qemu) info tlb
 MMU contexts: Primary: 0, Secondary: 0
 DMMU dump
 ...
 [14] VA: 180, PA: f40,   4M, priv, RW, locked, ctx 0 local
 ...
 [42] VA: 180, PA: f40,   8k, user, RW, unlocked, ctx 0 local
 ...
 
 OpenBSD 4.9:
 
 (qemu) info tlb
 MMU contexts: Primary: 0, Secondary: 0
 DMMU dump
 ...
 [08] VA: 180, PA: f40,   8k, user, RW, unlocked, ctx 0 local
 ...
 [14] VA: 180, PA: f40,   4M, priv, RW, locked, ctx 0 local
 ...
 
 
 The bug occurs because the QEMU TLB algorithm currently searches the TLB 
 *in order* starting from entry 0 until it finds a VA match.
 
 In the OpenBSD 5.5 case, the first mapping it finds is the 4M privileged 
 mapping, and so the fill_0_normal trap which uses user ASI 0x11 faults 
 due to not being privileged. This is in contrast to the OpenBSD 4.9 case 
 where the first mapping it finds is the 8K unprivileged mapping, hence 
 the fill_0_normal trap succeeds and we proceed to boot.
 
 Does anyone know how real hardware resolves conflicts between multiple 
 TLB entries with the same VA? My guess would be that the smaller 8K 
 mapping should take priority, but the documentation in relation to 
 address aliasing is fairly non-existent so I wondering if there are any 
 other rules relating to whether privileged mappings should take priority 
 or not? Once the behaviour is known, it will be fairly easy to fix up 
 QEMU to match.

I don;t know how the real hardware behaves.  But it certainly is the
intention that the 4M locked mapping gets used as soon as we've
taken over the trap table.  Not sure where the 8K mapping is coming
from.

 Finally it does raise an eyebrow that the first window trap taken when 
 the kernel takes over the trap table is a fill_0_normal *user* trap, 
 particularly when it's against an *unlocked* TLB entry which could 
 potentially could have been evicted beforehand. It might be worth 
 double-checking as to whether this is the intended behaviour or not.

Right.  It certainly isn't the intention that we end up a
fill_0_normal at this point.  Perhaps %wstate is initialized
differently in QEMU than on real hardware?  The OpenBSD bootstrap code
does set %wstate appropriately immediately after taking over the trap
table.  We can't really do this earlier since we don't know the
conventions used by the spill and fill handlers provided by the
firmware.  But it looks like a Sun Fire T2000 actually initializes
%wstate to 0.

So perhaps we're just getting lucky on real hardware that the prom
code doesn't spill our trap frame and therefore we don't have to fill
it again.



Re: uchcom(4) did not work

2014-05-14 Thread Mark Kettenis
 Date: Wed, 14 May 2014 11:04:56 +0200
 From: Martin Pieuchot mpieuc...@nolizard.org
 
 On 13/05/14(Tue) 21:24, Mike Larkin wrote:
  On Wed, May 14, 2014 at 11:02:49AM +0900, SASANO Takayoshi wrote:
   Hi,
   
   Simply magic values are rewrited with #define.
   If these values need to be disassembled, please take a while...
   
  
  I think we need to understand what those values mean. When I mentioned
  #defines, I meant something like:
  
  #define UCHCOM_SOME_FLAG 0x1234
  #define UCHCOM_SOME_OTHER_FLAG 0x5678
  ...
  ...
  #define UCHCOM_RESET_VALUE (UCHCOM_SOME_FLAG | UCHCOM_SOME_OTHER_FLAG)
  
  That way we know what the values do. If the value we're setting is not
  a flag, we should understand what 0x501F and 0xD90A actually mean.
 
 Unfortunately a lot of USB drivers are written without spec. and some
 values are taken by analysing the traffic generated by drivers on other
 OS.
 
  If we don't do it this way, the next person to come along and try to
  work on this code won't have any idea what to do.
 
 I also really don't like magic values, but I don't see how we could do
 it differently.  Maybe somebody can send an email to the author of the
 linux driver and ask him what these values are.  But I'd bet he doesn't
 no neither.

And really, the old code had magic numbers as well.

   Index: uchcom.c
   ===
   RCS file: /cvs/src/sys/dev/usb/uchcom.c,v
   retrieving revision 1.19
   diff -u -p -r1.19 uchcom.c
   --- uchcom.c  15 Nov 2013 10:17:39 -  1.19
   +++ uchcom.c  14 May 2014 01:43:34 -
   @@ -91,18 +91,14 @@ int   uchcomdebug = 0;
#define UCHCOM_BRK1_MASK 0x01
#define UCHCOM_BRK2_MASK 0x40

   -#define UCHCOM_LCR1_MASK 0xAF
   -#define UCHCOM_LCR2_MASK 0x07
   -#define UCHCOM_LCR1_PARENB   0x80
   -#define UCHCOM_LCR2_PAREVEN  0x07
   -#define UCHCOM_LCR2_PARODD   0x06
   -#define UCHCOM_LCR2_PARMARK  0x05
   -#define UCHCOM_LCR2_PARSPACE 0x04
   -
#define UCHCOM_INTR_STAT10x02
#define UCHCOM_INTR_STAT20x03
#define UCHCOM_INTR_LEAST4

   +/* these values come from Linux (drivers/usb/serial/ch341.c) */
   +#define UCHCOM_RESET_VALUE   0x501F  /* XXX default line mode? */
   +#define UCHCOM_RESET_INDEX   0xD90A  /* XXX default baud rate? */
   +
#define UCHCOMIBUFSIZE 256
#define UCHCOMOBUFSIZE 256

   @@ -707,27 +703,10 @@ uchcom_set_dte_rate(struct uchcom_softc 
int
uchcom_set_line_control(struct uchcom_softc *sc, tcflag_t cflag)
{
   - usbd_status err;
   - uint8_t lcr1 = 0, lcr2 = 0;
   -
   - err = uchcom_read_reg(sc, UCHCOM_REG_LCR1, lcr1, UCHCOM_REG_LCR2,
   - lcr2);
   - if (err) {
   - printf(%s: cannot get LCR: %s\n,
   -sc-sc_dev.dv_xname, usbd_errstr(err));
   - return EIO;
   - }
   -
   - lcr1 = ~UCHCOM_LCR1_MASK;
   - lcr2 = ~UCHCOM_LCR2_MASK;
   -
 /*
  * XXX: it is difficult to handle the line control appropriately:
   -  *   - CS8, !CSTOPB and any parity mode seems ok, but
   -  *   - the chip doesn't have the function to calculate parity
   -  * in !CS8 mode.
   -  *   - it is unclear that the chip supports CS5,6 mode.
   -  *   - it is unclear how to handle stop bits.
   +  *   work as chip default - CS8, no parity, !CSTOPB
   +  *   other modes are not supported.
  */

 switch (ISSET(cflag, CSIZE)) {
   @@ -739,21 +718,8 @@ uchcom_set_line_control(struct uchcom_so
 break;
 }

   - if (ISSET(cflag, PARENB)) {
   - lcr1 |= UCHCOM_LCR1_PARENB;
   - if (ISSET(cflag, PARODD))
   - lcr2 |= UCHCOM_LCR2_PARODD;
   - else
   - lcr2 |= UCHCOM_LCR2_PAREVEN;
   - }
   -
   - err = uchcom_write_reg(sc, UCHCOM_REG_LCR1, lcr1, UCHCOM_REG_LCR2,
   - lcr2);
   - if (err) {
   - printf(%s: cannot set LCR: %s\n,
   -sc-sc_dev.dv_xname, usbd_errstr(err));
   - return EIO;
   - }
   + if (ISSET(cflag, PARENB) || ISSET(cflag, CSTOPB))
   + return EINVAL;

 return 0;
}
   @@ -778,38 +744,12 @@ int
uchcom_reset_chip(struct uchcom_softc *sc)
{
 usbd_status err;
   - uint8_t lcr1, lcr2, pre, div, mod;
   - uint16_t val=0, idx=0;
   -
   - err = uchcom_read_reg(sc, UCHCOM_REG_LCR1, lcr1, UCHCOM_REG_LCR2, 
   lcr2);
   - if (err)
   - goto failed;
   -
   - err = uchcom_read_reg(sc, UCHCOM_REG_BPS_PRE, pre, UCHCOM_REG_BPS_DIV,
   - div);
   - if (err)
   - goto failed;
   -
   - err = uchcom_read_reg(sc, UCHCOM_REG_BPS_MOD, mod, UCHCOM_REG_BPS_PAD,
   - NULL);
   - if (err)
   - goto failed;
   -
   - val |= (uint16_t)(lcr10xF0)  8;
   - val |= 0x01;
   - val |= (uint16_t)(lcr20x0F)  8;
   - val |= 0x02;
   - idx |= pre  0x07;
   - val |= 0x04;
   - idx |= (uint16_t)div  8;
   - val |= 0x08;
   - idx |= mod  0xF8;
   - val |= 0x10;

   - DPRINTF((%s: reset v=0x%04X, i=0x%04X\n,
 

Re: NOINET6 by default

2014-05-14 Thread Mark Kettenis
 Date: Wed, 14 May 2014 23:29:20 +0200
 From: Henning Brauer lists-openbsdt...@bsws.de

 this diff fixes that. well, really two independent parts.
 one: set the NOINET6 flag by default on each and every interface.

ok on that bit

 two: implement ifconfig if +inet6 to turn inet6 on and assign
 the link-local addr.

I don't think this is a good idea; didn't we establish the other day
that ifconfig if eui64 already did what your +inet6 does?



Re: got me a 16-core octeon donated.

2014-05-15 Thread Mark Kettenis
 Date: Thu, 15 May 2014 22:04:16 +0200
 From: Janne Johansson icepic...@gmail.com
 
 After some insight from jasper, I stripped away the randomdata section and
 voila:
 
 Copyright (c) 1982, 1986, 1989, 1991, 1993
 The Regents of the University of California.  All rights reserved.
 Copyright (c) 1995-2014 OpenBSD. All rights reserved.
 http://www.OpenBSD.org
 
 OpenBSD 5.5 (RAMDISK) #0: Fri Mar 14 12:10:56 CET 2014
 r...@octeon.office.jasper.la:/usr/src/sys/arch/octeon/compile/RAMDISK
 real mem = 4002201600 (3816MB)
 avail mem = 3970760704 (3786MB)
 warning: no entropy supplied by boot loader
 mainbus0 at root
 cpu0 at mainbus0: Unknown CPU type (0x0) rev 0.3 500 MHz, Software FP
 emulation
 cpu0: cache L1-I 32KB D 16KB 4 way, L2 128KB direct
 clock0 at mainbus0: int 5
 iobus0 at mainbus0
 octcf at iobus0 base 0x1d000800 irq 0 not configured
 pcibus0 at iobus0 irq 0
 pci0 at pcibus0 bus 0
 0:0:0: mem address conflict 0xf800/0x800
 0:2:0: bridge mem address conflict 0x1000/0x10
 vendor Cavium, unknown product 0x0005 (class processor subclass MIPS, rev
 0x03) at pci0 dev 0 function 0 not configured
 Pericom PI7C21P100 PCIX-PCIX rev 0x01 at pci0 dev 2 function 0 not
 configured

Looks like ppb(4) is missing from the kernel config.



Re: pckbd volume keys (part 1), diff to test

2014-05-26 Thread Mark Kettenis
 From: David Coppa dco...@gmail.com
 Date: Mon, 26 May 2014 13:23:21 +0200
 
 On Fri, May 23, 2014 at 12:42 PM, Alexandre Ratchov a...@caoua.org wrote:
  On Wed, Apr 30, 2014 at 01:06:48AM +0200, Alexandre Ratchov wrote:
  This diff attempts to unify volume keys; it makes pckbd and ukbd
  volume keys behave like all other volume keys (acpithinkpad,
  acpiasus, macppc/abtn and similar drivers): simply adjust the
  hardware volume without passing keystroke events to upper layers
  (i.e. consume the keystroke).
 
  If your volume keys tend to mess the volume while in X (example
  mplayer), try this diff and see if it makes things better (or
  worse).
 
  No test reports so far. To test this: start X, then:
 
  - press the vol - button many times (don't hold is pressed),
until volume goes to zero.
 
  - start a movie in mplayer, there's no sound as volume is zero.
 
  - press the vol + button and hold it down; now mplayer indicates
the volume reached the maximum. Still you don't hear anything.
 
  Confusing, isn't it? Then rebuild the kernel with this diff and
  retry. With the diff volume keys are simple and deterministic: they
  simply adjust the volume and don't trigger hot-keys or whatever.
 
 Tested on my laptop: it follows the principle of least astonishment,
 thus I like it.

But as I said before, the problem is that this breaks the visual
feedback feature in desktop environments and applications like Gnome.

We really need a discussion about the desired behaviour of the volume
keys that involves porters as well as a broader range of users.  It is
impossible to judge diffs like this one without having a clear picture
of the desired end result.

That said, I think that:

1. We need a kernel interface for toggling between the volume keys
   only directly manipulating the mixer and having them only generate
   events.  This doesn't necessarily have to be a user knob; it could
   be something that applications that want to see events and manage
   the sound volume themselve would flip.

2. Mixer control needs to be integrated with sndio, such that
   applications that elect to receive events can act upon them by
   using a consistent API.



Re: exp2(3) bug?

2014-06-02 Thread Mark Kettenis
 Date: Mon, 02 Jun 2014 09:34:20 +0200
 From: Benjamin Baier program...@netzbasis.de
 
 You might want to read up on floating point arithmetic. (rounding and 
 representation)

Well, the difference between 4.994404 and 5.0 is a bit large to blame
rounding and binary representation.  And other OpenBSD platforms
(amd64, sparc64, powerpc) return the expected result.  So I'd say that
there is a bug in the i386-specific implementation of exp2(3).

 On 06/02/14 05:13, Daniel Dickman wrote:
  I hit this problem while working with the numpy 1.8.1 regress suite
  which has some tests that are currently failing.
 
  Here is a reduced test case of the logaddexp2 python function which
  ends up calling exp2. Is this a bug in the openbsd exp2
  implementation?
 
  ---8---
  #include stdio.h
  #include stdlib.h
  #include math.h
 
  int main(void) {
   double x;
   double y;
 
   // x = log2(5)
   x = 2.32192809489;
   // y = 2**(log2(5))
   y = exp2(x);
 
   printf(expected: 5.0\n);
   printf(actual:   %f\n, y);
 
  ---8---
 
  on a linux/x86_64 machine:
 
  # gcc -lm test.c  ./a.out
  expected: 5.0
  actual:   5.00
 
  on an openbsd/i386 machine:
 
  # gcc -lm test.c  ./a.out
  expected: 5.0
  actual:   4.994404
 
 
 



Re: exp2(3) bug?

2014-06-02 Thread Mark Kettenis
 Date: Mon, 2 Jun 2014 10:17:53 +0200 (CEST)
 From: Mark Kettenis mark.kette...@xs4all.nl
 
  Date: Mon, 02 Jun 2014 09:34:20 +0200
  From: Benjamin Baier program...@netzbasis.de
  
  You might want to read up on floating point arithmetic. (rounding and 
  representation)
 
 Well, the difference between 4.994404 and 5.0 is a bit large to blame
 rounding and binary representation.  And other OpenBSD platforms
 (amd64, sparc64, powerpc) return the expected result.  So I'd say that
 there is a bug in the i386-specific implementation of exp2(3).

And here is a fix.  There actually isn't any i386-specific code, but
i386 is special and needs STRICT_ALIGN() to work properly for double
as well as float.  FreeBSD made the same change a while ago:

http://svnweb.FreeBSD.org/base/head/lib/msun/src/math_private.h?revision=240827view=markup

Haven't run the regression tests yet with this change.


Index: src/math_private.h
===
RCS file: /cvs/src/lib/libm/src/math_private.h,v
retrieving revision 1.16
diff -u -p -r1.16 math_private.h
--- src/math_private.h  12 Nov 2013 20:35:09 -  1.16
+++ src/math_private.h  2 Jun 2014 09:30:13 -
@@ -349,7 +349,7 @@ do {
\
 #defineSTRICT_ASSIGN(type, lval, rval) do {\
volatile type __lval;   \
\
-   if (sizeof(type) = sizeof(double)) \
+   if (sizeof(type) = sizeof(long double))\
(lval) = (rval);\
else {  \
__lval = (rval);\



Re: nextafterl(3) possible bug

2014-06-02 Thread Mark Kettenis
 Date: Mon, 2 Jun 2014 07:34:59 -0400
 From: Daniel Dickman didick...@gmail.com
 
 From the numpy test suite, I think I might have found a bug in
 nextafterl(3). The result_ld variable below comes back as nan on
 i386. But doing the same calculations with floats returns the expected
 values.
 
 A test on Linux also shows the expected results for both the float and
 long double cases.

Another bug.  Intel chose an extended precision format with an
explicit integer bit, and the code doesn't handle that.  Assuming we
don't support machines with extended precision format that have an
implicit integer bit, the following diff (an adaptation of the code in
glibc) should fix things.  Not entirely happy with the fix though, so
I'm still thinking about improvements.

Index: src/ld80/s_nextafterl.c
===
RCS file: /cvs/src/lib/libm/src/ld80/s_nextafterl.c,v
retrieving revision 1.4
diff -u -p -r1.4 s_nextafterl.c
--- src/ld80/s_nextafterl.c 12 Nov 2013 21:07:28 -  1.4
+++ src/ld80/s_nextafterl.c 2 Jun 2014 13:21:58 -
@@ -32,8 +32,8 @@ nextafterl(long double x, long double y)
ix = esx0x7fff;/* |x| */
iy = esy0x7fff;/* |y| */
 
-   if (((ix==0x7fff)((hx|lx)!=0)) ||   /* x is nan */
-   ((iy==0x7fff)((hy|ly)!=0))) /* y is nan */
+   if (((ix==0x7fff)((hx=0x7fff|lx)!=0)) ||   /* x is nan */
+   ((iy==0x7fff)((hy-0x7fff|ly)!=0))) /* y is nan */
   return x+y;
if(x==y) return y;  /* x=y, return y */
if((ix|hx|lx)==0) { /* x == 0 */
@@ -47,31 +47,50 @@ nextafterl(long double x, long double y)
if(ixiy||((ix==iy)  (hxhy||((hx==hy)(lxly) {
  /* x  y, x -= ulp */
if(lx==0) {
-   if (hx==0) esx -= 1;
-   hx -= 1;
+   if (hx = 0x8000) {
+ if (esx == 0) {
+   --hx;
+ } else {
+   esx -= 1;
+   hx = hx - 1;
+   if (esx  0)
+ hx |= 0x8000;
+ }
+   } else
+ hx -= 1;
}
lx -= 1;
} else {/* x  y, x += ulp */
lx += 1;
if(lx==0) {
hx += 1;
-   if (hx==0)
+   if (hx==0 || (esx == 0  hx == 0x8000)) {
esx += 1;
+   hx |= 0x8000;
+   }
}
}
} else {/* x  0 */
if(esy=0||(ixiy||((ix==iy)(hxhy||((hx==hy)(lxly)){
  /* x  y, x -= ulp */
if(lx==0) {
-   if (hx==0) esx -= 1;
-   hx -= 1;
+   if (hx = 0x8000) {
+   esx -= 1;
+   hx = hx - 1;
+   if ((esx0x7fff)  0)
+ hx |= 0x8000;
+   } else
+ hx -= 1;
}
lx -= 1;
} else {/* x  y, x += ulp */
lx += 1;
if(lx==0) {
hx += 1;
-   if (hx==0) esx += 1;
+   if (hx==0 || (esx == 0x8000  hx == 0x8000)) {
+   esx += 1;
+   hx |= 0x8000;
+   }
}
}
}



Re: nextafterl(3) possible bug

2014-06-04 Thread Mark Kettenis
 Date: Mon, 2 Jun 2014 21:18:26 -0400
 From: Daniel Dickman didick...@gmail.com
 
 
  Another bug.  Intel chose an extended precision format with an
  explicit integer bit, and the code doesn't handle that.  Assuming we
  don't support machines with extended precision format that have an
  implicit integer bit, the following diff (an adaptation of the code in
  glibc) should fix things.  Not entirely happy with the fix though, so
  I'm still thinking about improvements.
 
 confirming that this patch fixes the failing numpy regress test on i386.
 
 let me know if you want me to test a different diff.

Here's a better diff, inspired by what FreeBSD has.

ok?


Index: s_nextafterl.c
===
RCS file: /cvs/src/lib/libm/src/ld80/s_nextafterl.c,v
retrieving revision 1.4
diff -u -p -r1.4 s_nextafterl.c
--- s_nextafterl.c  12 Nov 2013 21:07:28 -  1.4
+++ s_nextafterl.c  4 Jun 2014 10:05:17 -
@@ -32,8 +32,8 @@ nextafterl(long double x, long double y)
ix = esx0x7fff;/* |x| */
iy = esy0x7fff;/* |y| */
 
-   if (((ix==0x7fff)((hx|lx)!=0)) ||   /* x is nan */
-   ((iy==0x7fff)((hy|ly)!=0))) /* y is nan */
+   if (((ix==0x7fff)((hx0x7fff|lx)!=0)) ||   /* x is nan */
+   ((iy==0x7fff)((hy0x7fff|ly)!=0))) /* y is nan */
   return x+y;
if(x==y) return y;  /* x=y, return y */
if((ix|hx|lx)==0) { /* x == 0 */
@@ -47,31 +47,30 @@ nextafterl(long double x, long double y)
if(ixiy||((ix==iy)  (hxhy||((hx==hy)(lxly) {
  /* x  y, x -= ulp */
if(lx==0) {
-   if (hx==0) esx -= 1;
-   hx -= 1;
+   if ((hx0x7fff)==0) esx -= 1;
+   hx = (hx - 1) | (hx  0x8000);
}
lx -= 1;
} else {/* x  y, x += ulp */
lx += 1;
if(lx==0) {
-   hx += 1;
-   if (hx==0)
-   esx += 1;
+   hx = (hx + 1) | (hx  0x8000);
+   if ((hx0x7fff)==0) esx += 1;
}
}
} else {/* x  0 */
if(esy=0||(ixiy||((ix==iy)(hxhy||((hx==hy)(lxly)){
  /* x  y, x -= ulp */
if(lx==0) {
-   if (hx==0) esx -= 1;
-   hx -= 1;
+   if ((hx0x7fff)==0) esx -= 1;
+   hx = (hx - 1) | (hx  0x8000);
}
lx -= 1;
} else {/* x  y, x += ulp */
lx += 1;
if(lx==0) {
-   hx += 1;
-   if (hx==0) esx += 1;
+   hx = (hx + 1) | (hx  0x8000);
+   if ((hx0x7fff)==0) esx += 1;
}
}
}



Re: mfi(4) vs WT and WB

2014-06-10 Thread Mark Kettenis
 Date: Tue, 10 Jun 2014 21:34:56 +0200
 From: Otto Moerbeek o...@drijf.net
 
 On Fri, Jun 06, 2014 at 08:54:24PM +0200, Otto Moerbeek wrote:
 
  This volumes feel pretty fast, so I suspect caching mode is OK. Still
  it is confusing to have a flag that doesn't reflect reality.
  
  I'm planning to upgrade the firmware next week. We'll see if that
  changes anything. BTW, al battery indicaters are healthy, no learning
  cycle going on or something like that. 
  
 
 I just upgraded the perc firmware. After reboot, the volumes now
 report they are in WB mode. We'll have to wait and see if it stays
 that way. 

Which firmware version are you running now?



Re: mfi(4) vs WT and WB

2014-06-10 Thread Mark Kettenis
 Date: Tue, 10 Jun 2014 21:55:04 +0200
 From: Otto Moerbeek o...@drijf.net
 
 On Tue, Jun 10, 2014 at 09:52:23PM +0200, Mark Kettenis wrote:
 
   Date: Tue, 10 Jun 2014 21:34:56 +0200
   From: Otto Moerbeek o...@drijf.net
   
   On Fri, Jun 06, 2014 at 08:54:24PM +0200, Otto Moerbeek wrote:
   
This volumes feel pretty fast, so I suspect caching mode is OK. Still
it is confusing to have a flag that doesn't reflect reality.

I'm planning to upgrade the firmware next week. We'll see if that
changes anything. BTW, al battery indicaters are healthy, no learning
cycle going on or something like that. 

   
   I just upgraded the perc firmware. After reboot, the volumes now
   report they are in WB mode. We'll have to wait and see if it stays
   that way. 
  
  Which firmware version are you running now?
 
 mfi0: PERC 6/i Integrated, firmware 6.3.3.0002, 256MB cache

And the release notes for that firmware still have the blurb about
RAID volumes always showing up as WT.  Quality engineering!



Re: acpiec(4): clear events based on vendor

2014-06-10 Thread Mark Kettenis
 Date: Tue, 10 Jun 2014 18:25:33 +0300
 From: Paul Irofti p...@irofti.net
 
 After discussions with Theo we decided to walk the table where needed
 instead of using the soft state variables.
 
 Also adding all the Samsung models to the quirks table (as per the
 Linux EC quirks table).

This diff breaks my Samsung NC10.  It provokes

  acpitz: critical temperature exceeded 144C, shutting down

messages during boot.  Below the dmesg from a standard -current kernel.


OpenBSD 5.5-current (GENERIC.MP) #0: Tue Jun 10 22:25:11 CEST 2014

kette...@albeniz.sibelius.xs4all.nl:/usr/src/sys/arch/i386/compile/GENERIC.MP
cpu0: Intel(R) Atom(TM) CPU N270 @ 1.60GHz (GenuineIntel 686-class) 1.60 GHz
cpu0: 
FPU,V86,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,DTES64,MWAIT,DS-CPL,EST,TM2,SSSE3,xTPR,PDCM,MOVBE,LAHF,PERF
real mem  = 1063612416 (1014MB)
avail mem = 1033764864 (985MB)
mpath0 at root
scsibus0 at mpath0: 256 targets
mainbus0 at root
bios0 at mainbus0: AT/286+ BIOS, date 11/25/08, BIOS32 rev. 0 @ 0xfd5f0, SMBIOS 
rev. 2.5 @ 0xdf010 (36 entries)
bios0: vendor Phoenix Technologies Ltd. version 03CA.MP00.20081125.KTW date 
11/25/2008
bios0: SAMSUNG ELECTRONICS CO., LTD. NC10
acpi0 at bios0: rev 2
acpi0: sleep states S0 S3 S4 S5
acpi0: tables DSDT FACP APIC HPET MCFG TCPA TMOR APIC BOOT SLIC SSDT SSDT SSDT
acpi0: wakeup devices HDEF(S4) PXS1(S4) PXS2(S4) PXS3(S4) USB1(S3) USB2(S3) 
USB3(S3) USB4(S3) USB7(S3) SLT0(S4) SLT1(S4) SLT2(S4) SLT3(S4) SLT6(S4) 
LANC(S4) PWRB(S4)
acpitimer0 at acpi0: 3579545 Hz, 24 bits
acpimadt0 at acpi0 addr 0xfee0: PC-AT compat
cpu0 at mainbus0: apid 0 (boot processor)
mtrr: Pentium Pro MTRR support, 8 var ranges, 88 fixed ranges
cpu0: apic clock running at 132MHz
cpu0: mwait min=64, max=64, C-substates=0.2.2.0.2, IBE
cpu1 at mainbus0: apid 1 (application processor)
cpu1: Intel(R) Atom(TM) CPU N270 @ 1.60GHz (GenuineIntel 686-class) 1.60 GHz
cpu1: 
FPU,V86,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,DTES64,MWAIT,DS-CPL,EST,TM2,SSSE3,xTPR,PDCM,MOVBE,LAHF,PERF
ioapic0 at mainbus0: apid 1 pa 0xfec0, version 20, 24 pins
ioapic0: misconfigured as apic 2, remapped to apid 1
acpihpet0 at acpi0: 14318179 Hz
acpimcfg0 at acpi0 addr 0xe000, bus 0-255
acpiprt0 at acpi0: bus 0 (PCI0)
acpiprt1 at acpi0: bus 2 (RP01)
acpiprt2 at acpi0: bus -1 (RP02)
acpiprt3 at acpi0: bus 3 (RP03)
acpiprt4 at acpi0: bus 4 (PCIB)
acpiec0 at acpi0
acpicpu0 at acpi0: C1, PSS
acpicpu1 at acpi0: C1, PSS
acpipwrres0 at acpi0: FN00, resource for FAN0
acpitz0 at acpi0: critical temperature is 98 degC
acpibat0 at acpi0: BAT1 type LION oem SAMSUNG Electronics
acpiac0 at acpi0: AC unit online
acpibtn0 at acpi0: LID0
acpibtn1 at acpi0: PWRB
acpibtn2 at acpi0: SLPB
acpivideo0 at acpi0: GFX0
bios0: ROM list: 0xc/0xec00! 0xdf000/0x1000! 0xe/0x1800!
cpu0: Enhanced SpeedStep 1596 MHz: speeds: 1600, 1333, 1067, 800 MHz
pci0 at mainbus0 bus 0: configuration mode 1 (bios)
pchb0 at pci0 dev 0 function 0 Intel 82945GME Host rev 0x03
vga1 at pci0 dev 2 function 0 Intel 82945GME Video rev 0x03
intagp0 at vga1
agp0 at intagp0: aperture at 0xd000, size 0x1000
inteldrm0 at vga1
drm0 at inteldrm0
inteldrm0: 1024x600
wsdisplay0 at vga1 mux 1: console (std, vt100 emulation)
wsdisplay0: screen 1-5 added (std, vt100 emulation)
Intel 82945GM Video rev 0x03 at pci0 dev 2 function 1 not configured
azalia0 at pci0 dev 27 function 0 Intel 82801GB HD Audio rev 0x02: msi
azalia0: codecs: Realtek ALC272
audio0 at azalia0
ppb0 at pci0 dev 28 function 0 Intel 82801GB PCIE rev 0x02: apic 1 int 17
pci1 at ppb0 bus 2
ath0 at pci1 dev 0 function 0 Atheros AR5424 rev 0x01: apic 1 int 16
ath0: AR5424 14.2 phy 7.0 rf 0.0, EU1W, address 00:24:2b:1d:6b:25
ppb1 at pci0 dev 28 function 2 Intel 82801GB PCIE rev 0x02: apic 1 int 18
pci2 at ppb1 bus 3
mskc0 at pci2 dev 0 function 0 Marvell Yukon 88E8040 rev 0x13, Yukon-2 FE+ 
rev. A0 (0x0): apic 1 int 18
msk0 at mskc0 port A: address 00:13:77:b3:54:f0
eephy0 at msk0 phy 0: 88E3016 10/100 PHY, rev. 0
uhci0 at pci0 dev 29 function 0 Intel 82801GB USB rev 0x02: apic 1 int 23
uhci1 at pci0 dev 29 function 1 Intel 82801GB USB rev 0x02: apic 1 int 19
uhci2 at pci0 dev 29 function 2 Intel 82801GB USB rev 0x02: apic 1 int 18
uhci3 at pci0 dev 29 function 3 Intel 82801GB USB rev 0x02: apic 1 int 16
ehci0 at pci0 dev 29 function 7 Intel 82801GB USB rev 0x02: apic 1 int 23
usb0 at ehci0: USB revision 2.0
uhub0 at usb0 Intel EHCI root hub rev 2.00/1.00 addr 1
ppb2 at pci0 dev 30 function 0 Intel 82801BAM Hub-to-PCI rev 0xe2
pci3 at ppb2 bus 4
ichpcib0 at pci0 dev 31 function 0 Intel 82801GBM LPC rev 0x02: PM disabled
pciide0 at pci0 dev 31 function 2 Intel 82801GBM SATA rev 0x02: DMA, channel 
0 wired to compatibility, channel 1 wired to compatibility
wd0 at pciide0 channel 0 drive 0: FUJITSU MHZ2160BH G2
wd0: 16-sector PIO, LBA48, 152627MB, 312581808 sectors
wd0(pciide0:0:0): 

Re: acpiec(4): clear events based on vendor

2014-06-11 Thread Mark Kettenis
 Date: Wed, 11 Jun 2014 08:40:51 +0200
 From: Remi Locherer remi.loche...@relo.ch
 
 On Wed, Jun 11, 2014 at 09:11:54AM +0300, Paul Irofti wrote:
  On Tue, Jun 10, 2014 at 11:50:02PM +0200, Remi Locherer wrote:
   On Tue, Jun 10, 2014 at 06:25:33PM +0300, Paul Irofti wrote:
After discussions with Theo we decided to walk the table where needed
instead of using the soft state variables.

Also adding all the Samsung models to the quirks table (as per the
Linux EC quirks table).

   
   I tried this diff with my Samsung notebook. With sysctl hw.sensors or
   apm the state of the power supply is displayed correctly. If I change the
   status (disconnect or connect again) this is then also showed correctly.
  
  So this time it works... Did you apply the diff on top of a current sys?
 
 I did a cvs up on June 10 and applied this diff on top of that. 
 
  
   But a current kernel (checkout from June 10) with this patch applied does
   not show the acpibat0 sensor values correctly.
  
  And this time it does not?
 
 With this diff hw.sensors.acpiac0.indicator0 works correctly but 
 hw.sensors.acpibat0.amphourX does not. With snapshot kernels from June 6
 and June 10 it's the other way round.
 
  
  I'm confused :-)
 
 I can imagin - the complexity of acpi combined with Samsung's implementation
 and my imprecise description ... ;-)

Our acpi code does something wrong.  This seems to be the root cause
of the acpitz(4) problems that we're seeing on a wider variety of
hardware.  I really think we should try to fix that broader issue
before trying to fix this more specific suspend/resume issue on
Samsung hardware.



Re: We can dump(8) more than 2TB

2014-06-12 Thread Mark Kettenis
 Date: Thu, 12 Jun 2014 15:14:29 -0400
 From: Ted Unangst t...@tedunangst.com
 
 On Thu, Jun 12, 2014 at 21:07, Christian Weisgerber wrote:
  After writing 2TB (INT_MAX * TP_BSIZE), dump(8) stops reporting
  progress because the blockswritten variable has wrapped around to
  negative.  It needs to be a larger type like the tapesize variable;
  see optr.c:timeest().  This only affects the terminal chatter.  The
  actual dump functionality is fine.
 
  int notify; /* notify operator flag */
  -intblockswritten;  /* number of blocks written on current tape */
  +off_t  blockswritten;  /* number of blocks written on current tape */
  int tapeno; /* current tape number */
  time_t  tstart_writing; /* when started writing the first tape block */
  longxferrate;   /* averaged transfer rate of all volumes */
 
 I'm not sure off_t is the right semantic type. Perhaps just use int64_t?

int64_t is what we use in struct stat.

That said, there is existing off_t abuse in dump.



Re: [PATCH] Atheros AR9281 miniPCI-E new product id 2nd try

2014-06-18 Thread Mark Kettenis
 Date: Wed, 18 Jun 2014 10:20:05 +0200
 From: Stefan Sperling s...@openbsd.org
 
 On Wed, Jun 18, 2014 at 09:05:56AM +0200, Remi Locherer wrote:
  I had my athn card working fine in my APU board with -current amd64.
  But then after a reboot athn was not there anymore. The dmesg showed that
  it had the id 0xff1c. After the next reboot it attached again normaly
  (dmesg below from the working state).
  
  While searching for the ID 0xff1c I found something in the openwrt ticket
  system but could not track it down do an actual commit to linux.
  https://dev.openwrt.org/ticket/9991
 
 Adrian Chadd explains the problem as follows.
 In short: If you only see this problem with APU boards, please talk
 to pcengines.

So it seems fairly obvious that the APU BIOS isn't properly POSTing
the wireless card under some circumstances.  The fact that it doesn't
happen upon a cold boot, and doesn't happen with opon a warm boot if
the OpenBSD driver didn't attach, suggests that there is a dependence
on the state in which our driver leaves the hardware upon reboot.



Re: Unnecessary mmap flags?

2014-06-27 Thread Mark Kettenis
 Date: Thu, 26 Jun 2014 17:01:23 -0700
 From: Matthew Dempsky matt...@dempsky.org
 
 On Thu, Jun 26, 2014 at 12:28:18PM -0700, Matthew Dempsky wrote:
  I just reviewed our mmap(2) flags to compare them against Linux,
  FreeBSD, Solaris, and Darwin's flags.  Of the flags listed below, none
  of them are specified by POSIX, and none of them do anything
  interesting on OpenBSD: MAP_COPY just gets rewritten to MAP_PRIVATE,
  and the rest are silently ignored by UVM.
 
 Feedback so far is the useless flags should go away.  Diff below is a
 first step towards this:
 
 1. MAP_COPY is redefined as an alias for MAP_PRIVATE, and the other
 useless MAP_* flags are redefined to 0.  They're also hidden from the
 kernel to make sure no kernel code accidentally depends on them still.
 
 2. Adds COMPAT_O55_MAP_COPY so we can stay binary compatible with any
 OpenBSD 5.5 programs that still use MAP_COPY (probably none, but it's
 trivial to do), and COMPAT_O55_MAP_NOOPMASK just to keep track of
 which bits were previously reserved for do-nothing flags.
 
 3. Reshuffles the defines a little bit so the order makes more sense.
 
 Followup patch will add a deprecation warning for the MAP_* flags, but
 I think that'll need some ports testing, whereas this should be safe
 to commit now.
 
 ok?

Losing the descriptions of the no-op flags is a bit unfortunate.
Can you add those back?

 
 Index: sys/mman.h
 ===
 RCS file: /home/matthew/cvs-mirror/cvs/src/sys/sys/mman.h,v
 retrieving revision 1.24
 diff -u -p -r1.24 mman.h
 --- sys/mman.h13 Jun 2014 01:48:52 -  1.24
 +++ sys/mman.h26 Jun 2014 23:54:28 -
 @@ -50,32 +50,43 @@
   */
  #define  MAP_SHARED  0x0001  /* share changes */
  #define  MAP_PRIVATE 0x0002  /* changes are private */
 -#define  MAP_COPY0x0004  /* copy region at mmap time */
 +
 +/*
 + * Mapping type
 + */
 +#define  MAP_FILE0x  /* map from file (default) */
 +#define  MAP_ANON0x1000  /* allocated from memory, swap space */
  
  /*
   * Other flags
   */
 -#define  MAP_FIXED0x0010 /* map addr must be exactly as 
 requested */
 -#define  MAP_RENAME   0x0020 /* Sun: rename private pages to file */
 -#define  MAP_NORESERVE0x0040 /* Sun: don't reserve needed swap area 
 */
 -#define  MAP_INHERIT  0x0080 /* region is retained after exec */
 -#define  MAP_NOEXTEND 0x0100 /* for MAP_FILE, don't change file size 
 */
 -#define  MAP_HASSEMAPHORE 0x0200 /* region may contain semaphores */
 -#define  MAP_TRYFIXED 0x0400 /* attempt hint address, even within 
 heap */
 +#define  MAP_FIXED   0x0010  /* map addr must be exactly as 
 requested */
 +#define  __MAP_NOREPLACE 0x0800  /* fail if address not available */
  
 -#define  __MAP_NOREPLACE  0x0800 /* fail if address not available */
 +#ifdef _KERNEL
 +#define COMPAT_O55_MAP_COPY  0x0004  /* alias for MAP_PRIVATE */
 +#define COMPAT_O55_MAP_NOOPMASK  0x07e0  /* formerly reserved flag bits 
 */
 +#endif
 +
 +#define  MAP_FLAGMASK0x1ff7
  
 +#ifndef _KERNEL
  /*
 - * Error return from mmap()
 + * Deprecated flags with no significant meaning on OpenBSD.
   */
 -#define MAP_FAILED   ((void *)-1)
 +#define  MAP_COPYMAP_PRIVATE
 +#define  MAP_HASSEMAPHORE0
 +#define  MAP_INHERIT 0
 +#define  MAP_NOEXTEND0
 +#define  MAP_NORESERVE   0
 +#define  MAP_RENAME  0
 +#define  MAP_TRYFIXED0
 +#endif
  
  /*
 - * Mapping type
 + * Error return from mmap()
   */
 -#define  MAP_FILE0x  /* map from file (default) */
 -#define  MAP_ANON0x1000  /* allocated from memory, swap space */
 -#define  MAP_FLAGMASK0x1ff7
 +#define MAP_FAILED   ((void *)-1)
  
  /*
   * POSIX memory advisory values.
 Index: uvm/uvm_mmap.c
 ===
 RCS file: /home/matthew/cvs-mirror/cvs/src/sys/uvm/uvm_mmap.c,v
 retrieving revision 1.94
 diff -u -p -r1.94 uvm_mmap.c
 --- uvm/uvm_mmap.c13 Apr 2014 23:14:15 -  1.94
 +++ uvm/uvm_mmap.c26 Jun 2014 23:49:39 -
 @@ -345,8 +345,8 @@ sys_mmap(struct proc *p, void *v, regist
   return (EINVAL);
   if ((flags  MAP_FLAGMASK) != flags)
   return (EINVAL);
 - if (flags  MAP_COPY)
 - flags = (flags  ~MAP_COPY) | MAP_PRIVATE;
 + if (flags  COMPAT_O55_MAP_COPY)
 + flags = (flags  ~COMPAT_O55_MAP_COPY) | MAP_PRIVATE;
   if ((flags  (MAP_SHARED|MAP_PRIVATE)) == (MAP_SHARED|MAP_PRIVATE))
   return (EINVAL);
   if ((flags  (MAP_FIXED|__MAP_NOREPLACE)) == __MAP_NOREPLACE)
 
 



Re: Unnecessary mmap flags?

2014-06-27 Thread Mark Kettenis
 Date: Thu, 26 Jun 2014 12:28:18 -0700
 From: Matthew Dempsky matt...@dempsky.org
 
 I just reviewed our mmap(2) flags to compare them against Linux,
 FreeBSD, Solaris, and Darwin's flags.  Of the flags listed below, none
 of them are specified by POSIX, and none of them do anything
 interesting on OpenBSD: MAP_COPY just gets rewritten to MAP_PRIVATE,
 and the rest are silently ignored by UVM.
 
   Linux   FreeBSD Solaris Darwin
 MAP_COPY  no  YES*no  YES*
 MAP_RENAMEno  YES*YES*YES*
 MAP_NORESERVE YES YES*YES YES*
 MAP_INHERIT   no  YES**   no  no
 MAP_NOEXTEND  no  no  no  YES*
 MAP_HASSEMAPHORE  no  YES***  no  YES***
 MAP_TRYFIXED  no  no  no  no

MAP_TRYFIXED is a NetBSD'ism.  The others are inherited straight from 4.4BSD.

 So MAP_NORESERVE is perhaps necessary/worth keeping around, but the
 others seem like candidates for removal if nothing in ports needs
 them.

I think that MAP_NORESERVE should indeed be kept.

 MAP_HASSEMAPHORE is used in rthread_sem.c, but it doesn't do anything,
 so I suspect it's just cargo culting based on man page misinformation?
 Are there architectures that actually have restrictions on semaphore
 memory?

There architectures where atomic instructions only work on pages with
certain caching attributes.  Those attributes tend to be the default
attributes though, so there is not much value in retaining this flag.



Re: POSIX-compliant page fault error codes

2014-06-29 Thread Mark Kettenis
 Date: Tue, 24 Jun 2014 15:53:20 -0700
 From: Matthew Dempsky matt...@dempsky.org
 
 On Tue, Jun 24, 2014 at 11:04:10AM -0700, Matthew Dempsky wrote:
SIGBUS/BUS_ADRERR: Accessing a mapped page that exceeds the end of
the underlying mapped file.
 
 Generating SIGBUS for this case has proven controversial due to
 concern that this is Linux invented behavior and not compatible with
 Solaris, so I decided to collect some more background information on
 the subject.
 
 - SunOS 4.1.3's mmap() manual specifies: Any reference to addresses
 beyond the end of the object, however, will result in the delivery of
 a SIGBUS signal. This wording was relaxed to SIGBUS or SIGSEGV in
 SunOS 5.6 and remains in current manuals. (I'm not sure, but I suspect
 this may be to simply reflect that memory protection violations take
 priority over bounds checking.)

It makes sense that memory protection violations take priority over
bounds checking.

   SunOS 4.1.3: 
 http://www.freebsd.org/cgi/man.cgi?query=mmapsektion=2manpath=SunOS+4.1.3
   SunOS 5.6: 
 http://www.freebsd.org/cgi/man.cgi?query=mmapsektion=2manpath=SunOS+5.6
   Solaris 11: http://docs.oracle.com/cd/E23824_01/html/821-1463/mmap-2.html
 
 - Many other SVR-derived OSes similarly document SIGBUS in their
 mmap() manuals too:
 
   AIX: 
 http://www-01.ibm.com/support/knowledgecenter/ssw_aix_53/com.ibm.aix.basetechref/doc/basetrf1/mmap.htm?lang=en
   HPUX: 
 http://h20566.www2.hp.com/portal/site/hpsc/template.BINARYPORTLET/public/kb/docDisplay/resource.process/?spf_p.tpst=kbDocDisplay_ws_BIspf_p.rid_kbDocDisplay=docDisplayResURLjavax.portlet.begCacheTok=com.vignette.cachetokenspf_p.rst_kbDocDisplay=wsrp-resourceState%3DdocId%253Demr_na-c02261243-2%257CdocLocale%253Djavax.portlet.endCacheTok=com.vignette.cachetoken
   UnixWare: http://uw714doc.sco.com/en/man/html.2/mmap.2.html
 
 - This behavior has been (awkwardly) specified for mmap() since SUSv2:
 References within the address range starting at pa and continuing for
 len bytes to whole pages following the end of an object shall result
 in delivery of a SIGBUS signal. Later versions of POSIX have the same
 wording.
 
   SUSv2: http://pubs.opengroup.org/onlinepubs/007908799/xsh/mmap.html
   POSIX.2001: 
 http://pubs.opengroup.org/onlinepubs/009695399/functions/mmap.html
   POSIX.2008: 
 http://pubs.opengroup.org/onlinepubs/9699919799/functions/mmap.html
 
 - More generally, POSIX explains the SIGBUS/SIGSEGV distinction
 thusly: When an object is mapped, various application accesses to the
 mapped region may result in signals. In this context, SIGBUS is used
 to indicate an error using the mapped object, and SIGSEGV is used to
 indicate a protection violation or misuse of an address. Specific
 examples are provided too:
 
   Memory Protection: 
 http://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_08_03_03


Generating SIGBUS for access beyond the end of an object makes some
sense.  In this case there is a valid mapping; it's just that the
underlying physical memory pages aren't there.  It is no dissimmilar
to having mapped a physical address that maps to say the PCI bus.  On
real hardware accessing such a mapping will lead to a failed bus
transaction for which the logical representation is a SIGBUS.  (On
PeeCee hardware you'll probably get back an all-ones bit-pattern).
From a hardware-oriented perspective, SIGSEGV is generated by the MMU
and SIGBUS is generated by the underlying hardware.

So I don't think the Sun engineers made a totally unreasonable
decision here.  Unfortunately the CRSG made a different decision when
they reimplemented mmap support in 4.3BSD-Reno.  Or perhaps things got
broken after that...

In my view, generating SIGBUS under these circumstances is a bit
unfortunate.  Currently, SIGBUS on OpenBSD is a very clear indication
of an alignment issue.  If we would generate SIGBUS for access beyond
the end of a mmap'ed object this would no longer be the case.  We'd
actually have to look at the siginfo, which isn't printed by the shell.

On the other hand, passing memory objects by fd is getting more
common.  Xorg recently modernized its shared memory interface
(MIT-SHM, aka XShm) to support mmap'ing file descriptor passed over
sockets.  And DRM is moving in the same direction to solve security
issues with access to graphics objects.  But this approach has a
downside.  A malicious client could pass an fd to the X server and
subsequently truncate it after the X server mapped it.  If the X
server accesses this mapping, it will crash.  To prevent this from
happening, the X server will install a signal handler for SIGBUS,
check if a shared memory object is being accessed and patch things up
(by mmap'ing anonymous memory on top of the mapping).  This code can
be extended of course by handling SIGSEGV as well.  But this means
more work in xenocara and ports, and we might miss some places where
this needs to be done.

Theo has some worries that changing SIGSEGV to SIGBUS in this 

Re: Rename MAP_ANON to MAP_ANONYMOUS

2014-06-30 Thread Mark Kettenis
 Date: Mon, 30 Jun 2014 10:53:00 -0700
 From: Matthew Dempsky matt...@dempsky.org
 
 On Mon, Jun 30, 2014 at 10:42 AM, Mark Kettenis mark.kette...@xs4all.nl 
 wrote:
  Solaris documents MAP_ANON in its man page, and defines MAP_ANONYMOUS
  as MAP_ANON for source compatibility.
 
 Yep, but what about it?  Are you suggesting that should affect POSIX's
 standardization, or that we should do the same thing?  I suspect if
 POSIX standardizes MAP_ANONYMOUS, that Solaris would switch to
 documenting MAP_ANONYMOUS and providing MAP_ANON for source compat,
 no?

Yes, I'm saying that this should affect POSIX's standardization.
Solaris is where mmap(2) came from.

Also, look at:

  https://github.com/sgminer-dev/sgminer/blob/master/m4/mmap-anon.m4

which contains the following comment:

# Detect how mmap can be used to create anonymous (not file-backed) memory
# mappings.
# - On Linux, AIX, OSF/1, Solaris, Cygwin, Interix, Haiku, both MAP_ANONYMOUS
# and MAP_ANON exist and have the same value.
# - On HP-UX, only MAP_ANONYMOUS exists.
# - On MacOS X, FreeBSD, NetBSD, OpenBSD, only MAP_ANON exists.
# - On IRIX, neither exists, and a file descriptor opened to /dev/zero must be
# used.

This suggests that there is actually a clear majority of systems where
only MAP_ANON exists.  OK, UnixWare isn't listed here, and perhaps it
doesn't provide MAP_ANON for compatibility.  But frankly, UnixWare is
long dead.  Perhaps a few more systems in the only MAP_ANON exists
category have moved up to the the both MAP_ANONYMOUS and MAP_ANON
exist category.  But MacOS X 10.6 is still firmly in the only
MAP_ANON exists category.  And I'd say MacOS X has a much larger
installed base than HP-UX.

Regardless of what POSIX decides, I think we should add MAP_ANONYMOUS,
but stay true to our heritage and keep MAP_ANON as the primary #define
and in the man page.



Re: anoncvs errors

2014-07-04 Thread Mark Kettenis
 Date: Fri, 04 Jul 2014 04:31:35 -0500
 From: Vijay Sankar vsan...@foretell.ca
 
 obsd55$ pkg_info | grep cvs
 cvsync-0.25.0pre0   CVS repository synchronization utility

You need cvsync-0.25.0pre0p0



Re: Mark get*[ug]id() as NOLOCK

2014-07-07 Thread Mark Kettenis
 Date: Mon, 7 Jul 2014 11:18:53 -0700
 From: Matthew Dempsky matt...@dempsky.org
 
 Recently guenther changed user credentials to be a per-process
 resource, but kept a per-thread cache of credentials that get
 refreshed at each system call entry.  All of the get*[ug]id() system
 calls simply access the thread cached credentials, and possibly
 copyout() them if necessary, so they're safe to mark as NOLOCK.
 
 ok?

Makes sense to me.  But let's give guenther@ a chance to comment.

 Index: syscalls.master
 ===
 RCS file: /cvs/src/sys/kern/syscalls.master,v
 retrieving revision 1.141
 diff -u -p -r1.141 syscalls.master
 --- syscalls.master   6 Jul 2014 20:55:58 -   1.141
 +++ syscalls.master   7 Jul 2014 18:06:34 -
 @@ -80,8 +80,8 @@
   int flags, void *data); }
  22   STD { int sys_unmount(const char *path, int flags); }
  23   STD { int sys_setuid(uid_t uid); }
 -24   STD { uid_t sys_getuid(void); }
 -25   STD { uid_t sys_geteuid(void); }
 +24   STD NOLOCK  { uid_t sys_getuid(void); }
 +25   STD NOLOCK  { uid_t sys_geteuid(void); }
  #ifdef PTRACE
  26   STD { int sys_ptrace(int req, pid_t pid, caddr_t addr, \
   int data); }
 @@ -112,7 +112,7 @@
  41   STD { int sys_dup(int fd); }
  42   STD { int sys_fstatat(int fd, const char *path, \
   struct stat *buf, int flag); }
 -43   STD { gid_t sys_getegid(void); }
 +43   STD NOLOCK  { gid_t sys_getegid(void); }
  44   STD { int sys_profil(caddr_t samples, size_t size, \
   u_long offset, u_int scale); }
  #ifdef KTRACE
 @@ -124,7 +124,7 @@
  46   STD { int sys_sigaction(int signum, \
   const struct sigaction *nsa, \
   struct sigaction *osa); }
 -47   STD { gid_t sys_getgid(void); }
 +47   STD NOLOCK  { gid_t sys_getgid(void); }
  48   STD { int sys_sigprocmask(int how, sigset_t mask); }
  49   STD { int sys_getlogin(char *namebuf, u_int namelen); }
  50   STD { int sys_setlogin(const char *namebuf); }
 @@ -181,7 +181,7 @@
   const struct timeval *tptr); }
  78   STD { int sys_mincore(void *addr, size_t len, \
   char *vec); }
 -79   STD { int sys_getgroups(int gidsetsize, \
 +79   STD NOLOCK  { int sys_getgroups(int gidsetsize, \
   gid_t *gidset); }
  80   STD { int sys_setgroups(int gidsetsize, \
   const gid_t *gidset); }
 @@ -476,11 +476,11 @@
  278  UNIMPL  sys_extattr_set_fd
  279  UNIMPL  sys_extattr_get_fd
  280  UNIMPL  sys_extattr_delete_fd
 -281  STD { int sys_getresuid(uid_t *ruid, uid_t *euid, \
 +281  STD NOLOCK  { int sys_getresuid(uid_t *ruid, uid_t *euid, \
   uid_t *suid); }
  282  STD { int sys_setresuid(uid_t ruid, uid_t euid, \
   uid_t suid); }
 -283  STD { int sys_getresgid(gid_t *rgid, gid_t *egid, \
 +283  STD NOLOCK  { int sys_getresgid(gid_t *rgid, gid_t *egid, \
   gid_t *sgid); }
  284  STD { int sys_setresgid(gid_t rgid, gid_t egid, \
   gid_t sgid); }
 
 



Re: Mark get*[ug]id() as NOLOCK

2014-07-07 Thread Mark Kettenis
 Date: Mon, 07 Jul 2014 15:06:47 -0400
 From: Ted Unangst t...@tedunangst.com
 
 On Mon, Jul 07, 2014 at 11:18, Matthew Dempsky wrote:
  Recently guenther changed user credentials to be a per-process
  resource, but kept a per-thread cache of credentials that get
  refreshed at each system call entry.  All of the get*[ug]id() system
  calls simply access the thread cached credentials, and possibly
  copyout() them if necessary, so they're safe to mark as NOLOCK.
 
 Is copyout safe to call with no lock?

yes



Re: Use atomics for mutating p_sigmask

2014-07-08 Thread Mark Kettenis
 Date: Mon, 7 Jul 2014 13:46:03 -0700
 From: Matthew Dempsky matt...@dempsky.org
 
 p_sigmask is only modified by the owning thread from process context
 (e.g., sys_sigprocmask(), sys_sigreturn(), userret(), or postsig()),
 but it can be accessed anywhere (e.g., interrupts or threads on other
 CPUs).  Currently sys_sigprocmask() protects p_sigmask with splhigh(),
 but that's not MP-safe.
 
 The simpler solution is to take advantage of our atomics APIs.
 Unfortunately for implementing SIG_SETMASK, we don't have an
 atomic_store_int() function, and I can't bring myself to abuse
 volatile for this purpose, so I've resorted to atomic_swap_uint().

Sory, but I think that's wrong.  You need volatile to make sure the
mask is read from memory when you're checking bits.  Or you need to
insert the proper memory barriers I think.



Re: Compile kernel with -std=gnu99

2014-07-08 Thread Mark Kettenis
 Date: Tue, 8 Jul 2014 11:17:35 -0700
 From: Matthew Dempsky matt...@dempsky.org
 
 Diff below converts the kernel to build with -std=gnu99.  (For
 simplicity, I've only included amd64 for now, but I'll make the same
 change to all kernel Makefiles if this is ok.)
 
 The only incompatibility (that I'm aware of) is that ISO C99's inline
 semantics differ slightly from GNU C89's historical (but non-standard)
 inline semantics, but I believe the diff below keeps us consistent
 with the semantics the kernel currently assumes.  (More details
 below.)
 
 I've tested on amd64 and I get the exact same .o files with or without
 this change (except vers.o, but only because of timestamping).  It's
 probably worth conducting the same test on one of our GCC 3
 architectures.
 
 ok?

I disagree with this diff.  We should discourage the use of GNU
extensions in our kernel.  Therefore I think std=gnu99 would give the
wrong signal.

As for the inline issue.  IMHO, given the incompatbility problems
between ISO C and GNU C, only static inline is usable.  The
semantics of the other inline forms is just too confusing.  The
occasional extra copy of the code is as far as I'm concerned
acceptable.  The functions should be small enough for it not to
matter.



Re: Use atomics for mutating p_sigmask

2014-07-08 Thread Mark Kettenis
 Date: Tue, 8 Jul 2014 09:05:38 -0700
 From: Matthew Dempsky matt...@dempsky.org
 
 On Tue, Jul 8, 2014 at 1:29 AM, Mark Kettenis mark.kette...@xs4all.nl wrote:
  Date: Mon, 7 Jul 2014 13:46:03 -0700
  From: Matthew Dempsky matt...@dempsky.org
 
  p_sigmask is only modified by the owning thread from process context
  (e.g., sys_sigprocmask(), sys_sigreturn(), userret(), or postsig()),
  but it can be accessed anywhere (e.g., interrupts or threads on other
  CPUs).  Currently sys_sigprocmask() protects p_sigmask with splhigh(),
  but that's not MP-safe.
 
  The simpler solution is to take advantage of our atomics APIs.
  Unfortunately for implementing SIG_SETMASK, we don't have an
  atomic_store_int() function, and I can't bring myself to abuse
  volatile for this purpose, so I've resorted to atomic_swap_uint().
 
  Sory, but I think that's wrong.  You need volatile to make sure the
  mask is read from memory when you're checking bits.  Or you need to
  insert the proper memory barriers I think.
 
 To be clear: I meant I don't want to abuse volatile as though it's a
 magic make-these-operations-**atomic** flag, as that's not what it
 really means (even if in practice it often has that effect).
 
 Also, as long as the (always current thread) mutators and cross-thread
 accessors are still serialized by the kernel lock, marking p_sigmask
 as volatile shouldn't be necessary.  However, I do agree that once we
 start unlocking any of them (which is a future goal of this work), we
 need some sort of atomic guarantee on the read side too, and marking
 p_sigmask as volatile seems like a reasonable first step.  (I'd
 probably go further and also decorate the accesses with C11-style
 atomic_load()s.)
 
 So I'm happy to mark p_sigmask as volatile with this diff if you'd
 prefer.  I just don't think it's strictly necessary yet, but I'm not
 opposed to it either.

Even if the kernel lock protects us now, I think not adding volatile
would cause nasty surprises in the future.



Re: Compile kernel with -std=gnu99

2014-07-08 Thread Mark Kettenis
 
 On Tue, Jul 8, 2014 at 12:03 PM, Mark Kettenis mark.kette...@xs4all.nl 
 wrote:
  I disagree with this diff.  We should discourage the use of GNU
  extensions in our kernel.  Therefore I think std=gnu99 would give the
  wrong signal.
 
 Can you clarify your concern?  Currently we're (implicitly) compiling
 with -std=gnu89, which is ISO C90 + GNU extensions.  This diff changes
 us to (explicitly) compile with -std=gnu99 -fgnu89-inline, which is
 ISO C99 + GNU extensions + GNU C89 inline.
 
 I think moving from C90 to C99 is a good idea.
 
 I'm indifferent to GNU extensions.  If we could make do without them,
 then great; but technically inline asm is a GNU extension (even if
 it's available in C99 mode) and I doubt we'll benefit from eliminating
 that.
 
 Using GNU89 inline is an intermediary step, because the kernel isn't
 ready for C99 inline.  See below.
 
  As for the inline issue.  IMHO, given the incompatbility problems
  between ISO C and GNU C, only static inline is usable.  The
  semantics of the other inline forms is just too confusing.  The
  occasional extra copy of the code is as far as I'm concerned
  acceptable.  The functions should be small enough for it not to
  matter.
 
 Converting the existing inline functions to be C99 compatible (either
 by adding static or removing inline) is a next step I have planned,
 but I want to allow other C99 features first.
 
 Also, there are inline functions in MD code, so I'd rather have all
 kernels on -std=gnu99 -fgnu89-inline.  Then we can start cleaning up
 GNU89 inlines and remove -fgnu89-inline on arches once they're clean.
 E.g., first clean up all MI and x86 inlines, then the x86 kernels can
 start compiling without -fgnu89-inline, which will make sure we don't
 regress in MI code while we start tackling other MD files.

With that explanation, this sounds a lot more reasonable.



Re: Paravirtualized optimizations for KVM

2014-07-08 Thread Mark Kettenis
 Date: Tue, 8 Jul 2014 09:22:41 +0200 (CEST)
 From: Stefan Fritsch s...@sfritsch.de
 
 Hi,
 
 I have been trying to increase fork performance of openbsd/amd64 on KVM. 
 It turns out that when I increase the number of CPUs of a VM from 1 to 3, 
 a fork+exit micro benchmark is slowed down by a factor of 7.
 
 The main reason for this seems to be a very large number of cross-CPU TLB 
 flushes (about 4 per fork+exit). Each IPI causes several VM exits which 
 are expensive. To reduce this, I have been trying to use paravirtualized 
 interfaces provided by KVM and optimize some other things. These changes 
 are mostly activated by a new pseudo device paravirt (which has the 
 advantage that one can use UKC to tweak things without recompiling). 
 However, some changes will remain if not running on a hypervisor (or 
 paravirt is disabled). For example, x86_ipi() will use a pointer to 
 dispatch to the appropriate implementation.
 
 Is this the way to go forward? Or would you rather prefer a compile time 
 option and maybe ship a bsd.mp.paravirt kernel in addition to bsd+bsd.mp?

Are these paravirtualization APIs stable?  Are they (properly)
documented somewhere?

If we're serious about supporting OpenBSD on (KVM) hypervisors,
something like this makes sense.  We tend to try and have a single
kernel that runs on the widest range of hardware that is possible.
For example the OpenBSD/sparc64 kernel runs on both sun4u and sun4v
hardware, and the sun4v platforms has written paravirtualization all
over it.  There I successfully made use of code patching techniques.
That might help on x86 as well.

Can't say I'm happy with making the interrupt handling code even more
complicated though...



Re: Add MAP_ANONYMOUS as a synonym for MAP_ANON

2014-07-10 Thread Mark Kettenis
 Date: Thu, 10 Jul 2014 11:50:18 -0700
 From: Matthew Dempsky matt...@dempsky.org
 
 The Austin Group this morning accepted proposed wording to standardize
 both MAP_ANON and MAP_ANONYMOUS, recognizing that neither was clearly
 more popular than the other among applications, and that there's
 precedent in POSIX for simply standardizing multiple spellings for a
 feature when both are common and trivial for implementations to
 provide (e.g., PAGE_SIZE and PAGESIZE).
 
 Diff below defines MAP_ANONYMOUS as a synonym for MAP_ANON, keeping
 MAP_ANON as canonical in following BSD heritage.
 
 ok?

ok kettenis@

 Index: lib/libc/sys/mmap.2
 ===
 RCS file: /cvs/src/lib/libc/sys/mmap.2,v
 retrieving revision 1.50
 diff -u -p -r1.50 mmap.2
 --- lib/libc/sys/mmap.2   2 Jul 2014 22:22:35 -   1.50
 +++ lib/libc/sys/mmap.2   10 Jul 2014 18:32:59 -
 @@ -121,7 +121,7 @@ Sharing, mapping type, and options are s
  .Fa flags
  argument by OR'ing the following values.
  Exactly one of the first two values must be specified:
 -.Bl -tag -width MAP_PRIVATE -offset indent
 +.Bl -tag -width MAP_ANONYMOUS -offset indent
  .It Dv MAP_PRIVATE
  Modifications are private.
  .It Dv MAP_SHARED
 @@ -129,13 +129,16 @@ Modifications are shared.
  .El
  .Pp
  Any combination of the following flags may additionally be used:
 -.Bl -tag -width MAP_PRIVATE -offset indent
 +.Bl -tag -width MAP_ANONYMOUS -offset indent
  .It Dv MAP_ANON
  Map anonymous memory not associated with any specific file.
  The file descriptor used for creating
  .Dv MAP_ANON
  must currently be \-1 indicating no name is associated with the
  region.
 +.It Dv MAP_ANONYMOUS
 +Synonym for
 +.Dv MAP_ANON .
  .It Dv MAP_FIXED
  Demand that the mapping is placed at
  .Fa addr ,
 @@ -157,7 +160,7 @@ source compatibility with code written f
  but are not recommended for use in new
  .Ox
  code:
 -.Bl -tag -width MAP_PRIVATE -offset indent
 +.Bl -tag -width MAP_ANONYMOUS -offset indent
  .It Dv MAP_COPY
  Modifications are private and, unlike
  .Dv MAP_PRIVATE ,
 Index: sys/sys/mman.h
 ===
 RCS file: /cvs/src/sys/sys/mman.h,v
 retrieving revision 1.25
 diff -u -p -r1.25 mman.h
 --- sys/sys/mman.h27 Jun 2014 20:50:43 -  1.25
 +++ sys/sys/mman.h10 Jul 2014 18:32:59 -
 @@ -57,6 +57,7 @@
  #define  MAP_FIXED   0x0010  /* map addr must be exactly as 
 requested */
  #define  __MAP_NOREPLACE 0x0800  /* fail if address not available */
  #define  MAP_ANON0x1000  /* allocated from memory, swap space */
 +#define  MAP_ANONYMOUS   MAP_ANON/* alternate POSIX spelling */
  
  #define  MAP_FLAGMASK0x1ff7
  
 
 



apmd -A induced hangs

2014-07-13 Thread Mark Kettenis
Some people have reported that apmd -A makes their machines hang.
Could those people try the diff below and see whether it helps?

Index: acpicpu.c
===
RCS file: /home/cvs/src/sys/dev/acpi/acpicpu.c,v
retrieving revision 1.60
diff -u -p -r1.60 acpicpu.c
--- acpicpu.c   12 Jul 2014 18:48:17 -  1.60
+++ acpicpu.c   13 Jul 2014 14:00:03 -
@@ -202,9 +202,7 @@ acpicpu_set_pdc(struct acpicpu_softc *sc
static uint8_t cpu_oscuuid[16] = { 0x16, 0xA6, 0x77, 0x40, 0x0C, 0x29,
   0xBE, 0x47, 0x9E, 0xBD, 0xD8, 0x70,
   0x58, 0x71, 0x39, 0x53 };
-   cap = ACPI_PDC_C_C1_HALT | ACPI_PDC_P_FFH | ACPI_PDC_C_C1_FFH
-   | ACPI_PDC_C_C2C3_FFH | ACPI_PDC_SMP_P_SWCOORD | ACPI_PDC_SMP_C2C3
-   | ACPI_PDC_SMP_C1PT;
+   cap = ACPI_PDC_P_FFH | ACPI_PDC_C_C1_FFH;
 
if (aml_searchname(sc-sc_devnode, _OSC)) {
/* Query _OSC */



Re: Possible bug in cpu_chooseproc?

2014-07-13 Thread Mark Kettenis
 Date: Sun, 13 Jul 2014 13:12:46 -0700
 From: Matthew Dempsky matt...@dempsky.org
 
 As the name suggests, remrunqueue(p) removes p from its run queue, and
 I believe that makes TAILQ_FOREACH() here unsafe.  Instead of actually
 removing all threads from the processor, we'll only remove the first
 from each of its run queues.
 
 Diff below replaces TAILQ_FOREACH with the safe/idiomatic pattern for
 draining a queue.
 
 ok?

Ugh, yes.

 Index: kern_sched.c
 ===
 RCS file: /cvs/src/sys/kern/kern_sched.c,v
 retrieving revision 1.32
 diff -u -p -r1.32 kern_sched.c
 --- kern_sched.c  4 May 2014 05:03:26 -   1.32
 +++ kern_sched.c  13 Jul 2014 20:18:38 -
 @@ -272,7 +272,7 @@ sched_chooseproc(void)
   if (spc-spc_schedflags  SPCF_SHOULDHALT) {
   if (spc-spc_whichqs) {
   for (queue = 0; queue  SCHED_NQS; queue++) {
 - TAILQ_FOREACH(p, spc-spc_qs[queue], p_runq) {
 + while ((p = TAILQ_FIRST(spc-spc_qs[queue]))) {
   remrunqueue(p);
   p-p_cpu = sched_choosecpu(p);
   setrunqueue(p);
 
 



Re: Add mpbios to RAMDISK_CD?

2014-07-15 Thread Mark Kettenis
 Date: Tue, 15 Jul 2014 12:38:34 +0200
 From: Christian Weisgerber na...@mips.inka.de
 
 The reason the Soekris net6501 has hw.ncpufound=1 in bsd.rd is
 stupid.  The net6501 has MP BIOS, but not ACPI.  Only the GENERIC
 (thus also GENERIC.MP) and RAMDISK kernels on amd64 and i386 include
 mpbios(4), RAMDISK_CD doesn't.
 
 Is there any reason we don't have mpbios(4) in RAMDISK_CD?  Are
 there space constraints to consider?  It looks like an accidental
 omission to me.

Must be an ommission.  And space shouldn't be an issue as (floppy)
RAMDISK kernels have it.

 make release on amd64 works with this and the resulting bsd.rd has
 hw.ncpufound=2 on the net6501.  I haven't tested i386.

Seems it is missing from i386 RAMDISK_CD as well, so you should add it
there too.

 Index: arch/amd64/conf/RAMDISK_CD
 ===
 RCS file: /cvs/src/sys/arch/amd64/conf/RAMDISK_CD,v
 retrieving revision 1.141
 diff -u -p -r1.141 RAMDISK_CD
 --- arch/amd64/conf/RAMDISK_CD12 Jul 2014 21:56:56 -  1.141
 +++ arch/amd64/conf/RAMDISK_CD14 Jul 2014 22:37:38 -
 @@ -48,6 +48,8 @@ acpiprt*at acpi?
  acpimadt0at acpi?
  #acpitz* at acpi?
  
 +mpbios0  at bios0
 +
  cpu0 at mainbus0
  ioapic*  at mainbus?
  isa0 at mainbus0



Re: Add mpbios to RAMDISK_CD?

2014-07-15 Thread Mark Kettenis
 Date: Tue, 15 Jul 2014 13:31:55 +0200
 From: Christian Weisgerber na...@mips.inka.de
 
 Ted Unangst:
 
   Is there any reason we don't have mpbios(4) in RAMDISK_CD?  Are
   there space constraints to consider?  It looks like an accidental
   omission to me.
  
  I think there is some concern that the mpbios is wrong on many
  machines with acpi?
 
 RAMDISK_CD also has
 
 acpi0   at bios?
 acpiprt*at acpi?
 acpimadt0   at acpi?
 
 Is this insufficient to override mpbios?

acpi overrides mpbios if it attaches



Re: DNS control port additions to /etc/services

2014-07-15 Thread Mark Kettenis
 Date: Tue, 15 Jul 2014 17:17:45 +0200
 From: Antoine Jacoutot ajacou...@bsdfrog.org
 
 But be careful, this is not a user-editable file anymore, so we need
 to take into account that some stuffs that may not appear obvious to
 us may still be needed by people.

That's a mistake.  You're supposed to be able to add ports in there
for custom software such that you can use getservbyname(3) and don't
have to hardcode the port number in your code and be sure that
something else doesn't camp out on that port because of port
randomization.



Re: LibreSSL portable 2.0.2 released.

2014-07-16 Thread Mark Kettenis
 Date: Wed, 16 Jul 2014 11:03:12 +0200
 From: Martin Hecht he...@hlrs.de
 
 On 07/16/2014 05:40 AM, Bob Beck wrote:
  We have release an update, LibreSSL 2.0.2
 
  This release addresses the Linux forking and pid wrap issue reported 
  recently in
  the press.
 
  As noted before, we welcome feedback from the broader community.
 
  Enjoy
 
  -Bob
 
 Hi,
 
 with 2.0.2 on Xubuntu 12.04  I get the following compile error:
 
 make[1]: Entering directory `/home/user/tmp/libressl-2.0.2/apps'
   CCLD openssl
 ../crypto/.libs/libcrypto.so: undefined reference to `getauxval'
 collect2: ld returned 1 exit status
 make[1]: *** [openssl] Error 1
 make[1]: Leaving directory `/home/user/tmp/libressl-2.0.2/apps'
 make: *** [all-recursive] Error 1
 
 I ran ./configure --prefix=/usr/local/stow/libressl-2.0.2 before.
 
 2.0.1 was compiling fine and working in a first test.

Diff below fixes that issue.

Index: getentropy_linux.c
===
RCS file: /cvs/src/lib/libcrypto/crypto/getentropy_linux.c,v
retrieving revision 1.24
diff -u -p -r1.24 getentropy_linux.c
--- getentropy_linux.c  13 Jul 2014 13:37:38 -  1.24
+++ getentropy_linux.c  16 Jul 2014 09:13:23 -
@@ -486,6 +486,7 @@ getentropy_fallback(void *buf, size_t le
 
HD(cnt);
}
+#ifdef HAVE_AUXVAL
 #ifdef AT_RANDOM
/* Not as random as you think but we take what we are given */
p = (char *) getauxval(AT_RANDOM);
@@ -501,6 +502,7 @@ getentropy_fallback(void *buf, size_t le
p = (char *) getauxval(AT_BASE);
if (p)
HD(p);
+#endif
 #endif
 
SHA512_Final(results, ctx);



Re: LibreSSL portable 2.0.2 released.

2014-07-16 Thread Mark Kettenis
 From: Bob Beck b...@openbsd.org
 Date: Wed, 16 Jul 2014 07:55:16 -0600
 
 please commit that mark

committed to cvs (with HAVE_GETAUXVAL instead of HAVE_AUXVAL)

guess one of you can do the magic to get this into the git repo?

 On Wed, Jul 16, 2014 at 3:14 AM, Mark Kettenis mark.kette...@xs4all.nl 
 wrote:
  Date: Wed, 16 Jul 2014 11:03:12 +0200
  From: Martin Hecht he...@hlrs.de
 
  On 07/16/2014 05:40 AM, Bob Beck wrote:
   We have release an update, LibreSSL 2.0.2
  
   This release addresses the Linux forking and pid wrap issue reported 
   recently in
   the press.
  
   As noted before, we welcome feedback from the broader community.
  
   Enjoy
  
   -Bob
  
  Hi,
 
  with 2.0.2 on Xubuntu 12.04  I get the following compile error:
 
  make[1]: Entering directory `/home/user/tmp/libressl-2.0.2/apps'
CCLD openssl
  ../crypto/.libs/libcrypto.so: undefined reference to `getauxval'
  collect2: ld returned 1 exit status
  make[1]: *** [openssl] Error 1
  make[1]: Leaving directory `/home/user/tmp/libressl-2.0.2/apps'
  make: *** [all-recursive] Error 1
 
  I ran ./configure --prefix=/usr/local/stow/libressl-2.0.2 before.
 
  2.0.1 was compiling fine and working in a first test.
 
  Diff below fixes that issue.
 
  Index: getentropy_linux.c
  ===
  RCS file: /cvs/src/lib/libcrypto/crypto/getentropy_linux.c,v
  retrieving revision 1.24
  diff -u -p -r1.24 getentropy_linux.c
  --- getentropy_linux.c  13 Jul 2014 13:37:38 -  1.24
  +++ getentropy_linux.c  16 Jul 2014 09:13:23 -
  @@ -486,6 +486,7 @@ getentropy_fallback(void *buf, size_t le
 
  HD(cnt);
  }
  +#ifdef HAVE_AUXVAL
   #ifdef AT_RANDOM
  /* Not as random as you think but we take what we are given 
  */
  p = (char *) getauxval(AT_RANDOM);
  @@ -501,6 +502,7 @@ getentropy_fallback(void *buf, size_t le
  p = (char *) getauxval(AT_BASE);
  if (p)
  HD(p);
  +#endif
   #endif
 
  SHA512_Final(results, ctx);
 



Re: PATCH: further kernel malloc - mallocarray

2014-07-16 Thread Mark Kettenis
 From: Theo de Raadt dera...@cvs.openbsd.org
 Date: Wed, 16 Jul 2014 08:18:34 -0600
 
 I would really really prefer if we can keep these as const*const
 conversions instead of const, const.

Indeed, conversion to mallocarray only makes sence if one of the
multiplication operands is a variable.



Re: PATCH: fix regress skip slow

2014-07-19 Thread Mark Kettenis
 Date: Sat, 19 Jul 2014 18:16:56 +
 From: Miod Vallat m...@online.fr
 
  REGRESS_SKIP_SLOW didn't work as intended.  Default is no yet it checks
  for !empty.
 
 Good catch! Applied, thanks.
 
  Added t-exhaust as slow because it takes  90 minutes on my recent CPU.
 
 That's odd. This test only takes a few seconds on an amd64 system. Do
 you have any local changes to libc, or any particular /etc/malloc.conf
 settings, which could explain the slowdown?

Perhaps Doug is running this with ulimit -d unlimited?

If that's the case, perhaps the test should set a limit?



Re: PATCH: overflow behavior in malloc(9)

2014-07-22 Thread Mark Kettenis
 Date: Tue, 22 Jul 2014 21:21:39 +
 From: Doug Hogan d...@acyclic.org
 
 On Tue, Jul 22, 2014 at 02:51:17AM -0400, Jean-Philippe Ouellet wrote:
  That is misleading in the M_CANFAIL case.
  
  I'm not terribly good at wording things, but I suggest something
  more like this instead:
 
 Hmm I think it's only misleading in the M_CANFAIL case.  I think this
 diff makes it a little more complex than it needs to be.  What do you
 think about leaving the malloc option section as-is and instead
 explain how mallocarray() operates before it calls malloc()?
 
 Something along these lines: mallocarray(9) is a wrapper around
 malloc(9) that checks for overflow.  If arithmetic overflow is detected,
 it returns NULL when M_CANFAIL is enabled or else calls panic().
 Otherwise, it has the same behavior as malloc.
 
 Does that work?

Hmm, I believe, quite strongly, that we should always panic when a
arithmetic overflow is detected.

The M_CANFAIL flag is really there to allow for failure in certain
low-memory conditions, not to recover from programming bugs.



suspend-related shceduler fix

2014-07-26 Thread Mark Kettenis
Thanks to the evil Intel AMT serial, I've been able to figure out what
made my x220 hang when suspending.  Turns out that the fix matthew@
committed almost two weeks ago uncovered another bug that made
sched_stop_secondary_cpus() spin forever if there was a processing
running on a secondary cpu but nothing left on the run qeueue.  In
that case sched_choosecpu() short-circuits and returns the current
cpu.  The fix is obvious: don't short-circuit when we're on a cpu that
should stop.

ok?


Index: kern_sched.c
===
RCS file: /home/cvs/src/sys/kern/kern_sched.c,v
retrieving revision 1.33
diff -u -p -r1.33 kern_sched.c
--- kern_sched.c13 Jul 2014 21:44:58 -  1.33
+++ kern_sched.c26 Jul 2014 11:44:26 -
@@ -275,6 +275,7 @@ sched_chooseproc(void)
while ((p = TAILQ_FIRST(spc-spc_qs[queue]))) {
remrunqueue(p);
p-p_cpu = sched_choosecpu(p);
+   KASSERT(p-p_cpu != curcpu());
setrunqueue(p);
}
}
@@ -408,6 +409,7 @@ sched_choosecpu(struct proc *p)
 */
if (cpuset_isset(set, p-p_cpu) ||
(p-p_cpu == curcpu()  p-p_cpu-ci_schedstate.spc_nrun == 0 
+   (p-p_cpu-ci_schedstate.spc_schedflags  SPCF_SHOULDHALT) == 0 
curproc == p)) {
sched_wasidle++;
return (p-p_cpu);



Re: finally nuke sys/dkstat.h

2014-09-06 Thread Mark Kettenis
 Date: Sat, 6 Sep 2014 10:04:08 +
 From: Miod Vallat m...@online.fr
 
 sys/dkstat.h has not contained disk statistics since 17 years. The
 remaining defines from this file can already be found in sys/sched.h,
 and the variable declarations would better be in sys/tty.h.
 
 The following diff thus gets rid of this file and adjusts userland to
 use sys/sched.h when applicable.

In principle ok kettenis@, but there might be some ports that attempt
to include sys/dkstat.h.

 Index: libexec/rpc.rstatd/rstat_proc.c
 ===
 RCS file: /cvs/src/libexec/rpc.rstatd/rstat_proc.c,v
 retrieving revision 1.31
 diff -u -p -r1.31 rstat_proc.c
 --- libexec/rpc.rstatd/rstat_proc.c   8 Jul 2014 17:19:23 -   1.31
 +++ libexec/rpc.rstatd/rstat_proc.c   6 Sep 2014 09:59:17 -
 @@ -36,7 +36,7 @@
   */
  
  #include sys/param.h
 -#include sys/dkstat.h
 +#include sys/sched.h
  #include sys/socket.h
  #include sys/sysctl.h
  #include net/if.h
 Index: sbin/sysctl/sysctl.c
 ===
 RCS file: /cvs/src/sbin/sysctl/sysctl.c,v
 retrieving revision 1.203
 diff -u -p -r1.203 sysctl.c
 --- sbin/sysctl/sysctl.c  16 Aug 2014 21:39:16 -  1.203
 +++ sbin/sysctl/sysctl.c  6 Sep 2014 09:59:18 -
 @@ -38,10 +38,10 @@
  #include sys/sysctl.h
  #include sys/socket.h
  #include sys/malloc.h
 -#include sys/dkstat.h
  #include sys/uio.h
  #include sys/tty.h
  #include sys/namei.h
 +#include sys/sched.h
  #include sys/sensors.h
  #include sys/vmmeter.h
  #include net/route.h
 Index: sys/arch/hppa/hppa/autoconf.c
 ===
 RCS file: /cvs/src/sys/arch/hppa/hppa/autoconf.c,v
 retrieving revision 1.60
 diff -u -p -r1.60 autoconf.c
 --- sys/arch/hppa/hppa/autoconf.c 4 Sep 2014 19:01:02 -   1.60
 +++ sys/arch/hppa/hppa/autoconf.c 6 Sep 2014 09:59:18 -
 @@ -76,7 +76,6 @@ void (*cold_hook)(int); /* see below */
   * LED blinking thing
   */
  #ifdef USELEDS
 -#include sys/dkstat.h
  #include sys/kernel.h
  
  struct timeout heartbeat_tmo;
 Index: sys/arch/hppa64/hppa64/autoconf.c
 ===
 RCS file: /cvs/src/sys/arch/hppa64/hppa64/autoconf.c,v
 retrieving revision 1.20
 diff -u -p -r1.20 autoconf.c
 --- sys/arch/hppa64/hppa64/autoconf.c 4 Sep 2014 19:01:02 -   1.20
 +++ sys/arch/hppa64/hppa64/autoconf.c 6 Sep 2014 09:59:18 -
 @@ -77,7 +77,6 @@ void (*cold_hook)(int); /* see below */
   * LED blinking thing
   */
  #ifdef USELEDS
 -#include sys/dkstat.h
  #include sys/kernel.h
  
  struct timeout heartbeat_tmo;
 Index: sys/kern/kern_clock.c
 ===
 RCS file: /cvs/src/sys/kern/kern_clock.c,v
 retrieving revision 1.86
 diff -u -p -r1.86 kern_clock.c
 --- sys/kern/kern_clock.c 4 Sep 2014 19:14:47 -   1.86
 +++ sys/kern/kern_clock.c 6 Sep 2014 09:59:18 -
 @@ -39,7 +39,6 @@
  
  #include sys/param.h
  #include sys/systm.h
 -#include sys/dkstat.h
  #include sys/timeout.h
  #include sys/kernel.h
  #include sys/limits.h
 Index: sys/kern/kern_sysctl.c
 ===
 RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
 retrieving revision 1.263
 diff -u -p -r1.263 kern_sysctl.c
 --- sys/kern/kern_sysctl.c4 Sep 2014 19:14:47 -   1.263
 +++ sys/kern/kern_sysctl.c6 Sep 2014 09:59:19 -
 @@ -58,7 +58,6 @@
  #include sys/disk.h
  #include sys/sysctl.h
  #include sys/msgbuf.h
 -#include sys/dkstat.h
  #include sys/vmmeter.h
  #include sys/namei.h
  #include sys/exec.h
 @@ -74,7 +73,7 @@
  #include sys/evcount.h
  #include sys/un.h
  #include sys/unpcb.h
 -
 +#include sys/sched.h
  #include sys/mount.h
  #include sys/syscallargs.h
  
 Index: sys/kern/tty.c
 ===
 RCS file: /cvs/src/sys/kern/tty.c,v
 retrieving revision 1.113
 diff -u -p -r1.113 tty.c
 --- sys/kern/tty.c13 Jul 2014 15:29:04 -  1.113
 +++ sys/kern/tty.c6 Sep 2014 09:59:19 -
 @@ -46,7 +46,6 @@
  #undef   TTYDEFCHARS
  #include sys/file.h
  #include sys/conf.h
 -#include sys/dkstat.h
  #include sys/uio.h
  #include sys/kernel.h
  #include sys/vnode.h
 Index: sys/net/if_sl.c
 ===
 RCS file: /cvs/src/sys/net/if_sl.c,v
 retrieving revision 1.53
 diff -u -p -r1.53 if_sl.c
 --- sys/net/if_sl.c   22 Jul 2014 11:06:09 -  1.53
 +++ sys/net/if_sl.c   6 Sep 2014 09:59:20 -
 @@ -64,7 +64,6 @@
  
  #include sys/param.h
  #include sys/mbuf.h
 -#include sys/dkstat.h
  #include sys/socket.h
  #include sys/ioctl.h
  #include sys/file.h
 Index: sys/net/ppp_tty.c
 ===
 RCS file: /cvs/src/sys/net/ppp_tty.c,v
 retrieving revision 1.28
 diff -u -p -r1.28 ppp_tty.c
 

Clear PME Status when a wake event is received

2014-09-06 Thread Mark Kettenis
As pea@ noticed, Apple machines with an NVIDIA MCP79 chipset will
suffer an interrupt storm when you send them WOL packet to their
nfe(4) interface.  The acpi0 interrupt handler fires continously in
this case.  Some digging revealed that the device wake GPE for the
nfe(4) interface was casuing this.  The machine's AML sends a
notification, suggesting that the device driver for the device needs
to take some action.  But ours doesn't.

Some more digging revealed that the PCIe PME status bit gets set.
Clearing that bit stops the interrupts and makes the machine happy
again.  

So here is a diff that installs a notification handler for all PCI
devices that clears the PME status bit if the device has one.  This
seems to be what Windows does.  At least I found some Microsoft
documentation that says that the PCI driver is responsible for
clearing the PME status bit if a PME event is received.

ok?


Index: acpi.c
===
RCS file: /cvs/src/sys/dev/acpi/acpi.c,v
retrieving revision 1.267
diff -u -p -r1.267 acpi.c
--- acpi.c  20 Jul 2014 18:05:21 -  1.267
+++ acpi.c  6 Sep 2014 18:54:38 -
@@ -74,6 +74,7 @@ int   acpi_hasprocfvs;
 void   acpi_pci_match(struct device *, struct pci_attach_args *);
 pcireg_t acpi_pci_min_powerstate(pci_chipset_tag_t, pcitag_t);
 voidacpi_pci_set_powerstate(pci_chipset_tag_t, pcitag_t, int, int);
+intacpi_pci_notify(struct aml_node *, int, void *);
 
 intacpi_match(struct device *, void *, void *);
 void   acpi_attach(struct device *, struct device *, void *);
@@ -567,6 +568,8 @@ acpi_pci_match(struct device *dev, struc
state = pci_get_powerstate(pa-pa_pc, pa-pa_tag);
acpi_pci_set_powerstate(pa-pa_pc, pa-pa_tag, state, 1);
acpi_pci_set_powerstate(pa-pa_pc, pa-pa_tag, state, 0);
+
+   aml_register_notify(pdev-node, NULL, acpi_pci_notify, pdev, 0);
}
 }
 
@@ -660,6 +663,29 @@ acpi_pci_set_powerstate(pci_chipset_tag_
 
}
 #endif /* NACPIPWRRES  0 */
+}
+
+int
+acpi_pci_notify(struct aml_node *node, int ntype, void *arg)
+{
+   struct acpi_pci *pdev = arg;
+   pci_chipset_tag_t pc = NULL;
+   pcitag_t tag;
+   pcireg_t reg;
+   int offset;
+
+   /* We're only interested in Device Wake notifications. */
+   if (ntype != 2)
+   return (0);
+
+   tag = pci_make_tag(pc, pdev-bus, pdev-dev, pdev-fun);
+   if (pci_get_capability(pc, tag, PCI_CAP_PWRMGMT, offset, 0)) {
+   /* Clear the PME Status bit if it is set. */
+   reg = pci_conf_read(pc, tag, offset + PCI_PMCSR);
+   pci_conf_write(pc, tag, offset + PCI_PMCSR, reg);
+   }
+
+   return (0);
 }
 
 void



FC disks

2014-09-08 Thread Mark Kettenis
Hi All,

My supply of Fibre Channel disks is getting thin, If somebody has some
they're willing to donate to the project and ship to The Netherlands,
please contact me.

Thanks,

Mark



Re: Clear PME Status when a wake event is received

2014-09-08 Thread Mark Kettenis
 
 On Sat, Sep 06, 2014 at 09:18:26PM +0200, Mark Kettenis wrote:
  As pea@ noticed, Apple machines with an NVIDIA MCP79 chipset will
  suffer an interrupt storm when you send them WOL packet to their
  nfe(4) interface.  The acpi0 interrupt handler fires continously in
  this case.  Some digging revealed that the device wake GPE for the
  nfe(4) interface was casuing this.  The machine's AML sends a
  notification, suggesting that the device driver for the device needs
  to take some action.  But ours doesn't.
  
  Some more digging revealed that the PCIe PME status bit gets set.
  Clearing that bit stops the interrupts and makes the machine happy
  again.  
  
  So here is a diff that installs a notification handler for all PCI
  devices that clears the PME status bit if the device has one.  This
  seems to be what Windows does.  At least I found some Microsoft
  documentation that says that the PCI driver is responsible for
  clearing the PME status bit if a PME event is received.
  
  ok?
 
 Just a nit, there's a dozen or so notify types in the spec, should we
 replace '2' with something more understandable like ACPI_PME_EVENT or
 similar? That suggestion should probably apply to the others we look
 at in other places like 0x81 / 0x82, etc. Maybe that's better left
 for another cleanup diff?
 
 Other than that, diff looks ok to me.

Yeah, some #defines for those magic constants would be good.  We
actually have some, but their names are dock-specific.  So this will
need some additional cleanup.  I'll see what I can do.



apmd hangs

2014-09-08 Thread Mark Kettenis
The more code  documentation I read, the more I'm convinced that
coordinating state changes between logical processors isn't necessary
and actually is responsible for the hangs people have been seeing.

So here is a diff that does away with it all.  I've tested it on a few
laptops here, but it could use testing on a somewhat wider range of
machines.  I'm especially interested in seeing this tested on a dual
socket machine with apmd -A.


Index: i386/i386/mp_setperf.c
===
RCS file: /cvs/src/sys/arch/i386/i386/mp_setperf.c,v
retrieving revision 1.5
diff -u -p -r1.5 mp_setperf.c
--- i386/i386/mp_setperf.c  29 Jun 2014 01:01:20 -  1.5
+++ i386/i386/mp_setperf.c  8 Sep 2014 20:43:50 -
@@ -17,13 +17,10 @@
 
 #include sys/param.h
 #include sys/systm.h
-#include sys/proc.h
 #include sys/sysctl.h
 #include sys/mutex.h
 
 #include machine/cpu.h
-#include machine/cpufunc.h
-
 #include machine/intr.h
 
 struct mutex setperf_mp_mutex = MUTEX_INITIALIZER(IPL_HIGH);
@@ -31,14 +28,7 @@ struct mutex setperf_mp_mutex = MUTEX_IN
 /* underlying setperf mechanism e.g. k8_powernow_setperf() */
 void (*ul_setperf)(int);
 
-#define MP_SETPERF_STEADY  0   /* steady state - normal operation */
-#define MP_SETPERF_INTRANSIT   1   /* in transition */
-#define MP_SETPERF_PROCEED 2   /* proceed with transition */
-#define MP_SETPERF_FINISH  3   /* return from IPI */
-
-
 /* protected by setperf_mp_mutex */
-volatile int mp_setperf_state = MP_SETPERF_STEADY;
 volatile int mp_perflevel;
 
 void mp_setperf(int);
@@ -46,102 +36,27 @@ void mp_setperf(int);
 void
 mp_setperf(int level)
 {
-   CPU_INFO_ITERATOR cii;
-   struct cpu_info *ci;
-   int notready, s;
-
-   if (mp_setperf_state == MP_SETPERF_STEADY) {
-   mtx_enter(setperf_mp_mutex);
-   disable_intr();
-
-   mp_perflevel = level;
-
-   curcpu()-ci_setperf_state = CI_SETPERF_INTRANSIT;
-   /* ask all other processors to drop what they are doing */
-   CPU_INFO_FOREACH(cii, ci) {
-   if (ci-ci_setperf_state != CI_SETPERF_INTRANSIT) {
-   ci-ci_setperf_state =
-   CI_SETPERF_SHOULDSTOP;
-   i386_send_ipi(ci, I386_IPI_SETPERF);
-   }
-   }
-
-
-   /* Loop until all processors report ready */
-   do {
-   CPU_INFO_FOREACH(cii, ci) {
-   if ((notready = (ci-ci_setperf_state
-   != CI_SETPERF_INTRANSIT)))
-   break;
-   }
-   } while (notready);
-
-   mp_setperf_state = MP_SETPERF_PROCEED; /* release the hounds */
-
-   s = splipi();
-
-   ul_setperf(mp_perflevel);
-
-   splx(s);
-
-   curcpu()-ci_setperf_state = CI_SETPERF_DONE;
-   /* Loop until all processors report done */
-   do {
-   CPU_INFO_FOREACH(cii, ci) {
-   if ((notready = (ci-ci_setperf_state
-   != CI_SETPERF_DONE)))
-   break;
-   }
-   } while (notready);
-
-   mp_setperf_state = MP_SETPERF_FINISH;
-   /* delay a little for potential straglers */
-   DELAY(2);
-   curcpu()-ci_setperf_state = CI_SETPERF_READY;
-   mp_setperf_state = MP_SETPERF_STEADY; /* restore normallity */
-   enable_intr();
-   mtx_leave(setperf_mp_mutex);
-   }
+   mtx_enter(setperf_mp_mutex);
+   mp_perflevel = level;
 
+   ul_setperf(mp_perflevel);
+   i386_broadcast_ipi(I386_IPI_SETPERF);
+   mtx_leave(setperf_mp_mutex);
 }
 
 void
 i386_setperf_ipi(struct cpu_info *ci)
 {
-
-   disable_intr();
-
-   if (ci-ci_setperf_state == CI_SETPERF_SHOULDSTOP)
-   ci-ci_setperf_state = CI_SETPERF_INTRANSIT;
-
-   while (mp_setperf_state != MP_SETPERF_PROCEED)
-   ;
-
ul_setperf(mp_perflevel);
-
-   ci-ci_setperf_state = CI_SETPERF_DONE;
-
-   while (mp_setperf_state != MP_SETPERF_FINISH)
-   ;
-   ci-ci_setperf_state = CI_SETPERF_READY;
-
-   enable_intr();
 }
 
 void
-mp_setperf_init()
+mp_setperf_init(void)
 {
-   CPU_INFO_ITERATOR cii;
-   struct cpu_info *ci;
-
if (!cpu_setperf)
return;
-   ul_setperf = cpu_setperf;
 
+   ul_setperf = cpu_setperf;
cpu_setperf = mp_setperf;
-
-   CPU_INFO_FOREACH(cii, ci) {
-   ci-ci_setperf_state = CI_SETPERF_READY;
-   }
mtx_init(setperf_mp_mutex, IPL_HIGH);
 }
Index: i386/include/cpu.h

Re: PATCH: fix iwn(4) scan hangs

2014-09-09 Thread Mark Kettenis
 Date: Tue, 9 Sep 2014 17:29:35 +0200
 From: Fabian Raetz fabian.ra...@gmail.com
 
 On Tue, Sep 09, 2014 at 12:38:04PM +0200, Fabian Raetz wrote:
  Hi,
  
  below is a patch for iwn(4) which hopefully fixes a problem where iwn(4)
  does not return from a scan, if the interface is up. 
 
 here's an updated version which does not 
 set hdr-max_svc / hdr-pause_svc.
 
 Cristoph Zimmermann noticed that scan requests return no APs on his
 device (thanks for testing).
 iwn0 at pci2 dev 0 function 0 Intel WiFi Link 5100 rev 0x00: msi, MIMO 
 1T2R, MoW, address 00:21:6b:a3:70:7a
 
 As Piotr and Mike tested the patch from Piotr which does not set this
 values either and it still works on my card, this should the way to go
 for now.

Looks (almost) ok to me; there are some spaces vs. tabs issues with
some of the added #defines.  Tested on:

iwn0 at pci2 dev 0 function 0 Intel Centrino Advanced-N 6205 rev 0x34

So with some minor fixes: ok kettenis@


 Index: if_iwn.c
 ===
 RCS file: /cvs/src/sys/dev/pci/if_iwn.c,v
 retrieving revision 1.133
 diff -u -p -r1.133 if_iwn.c
 --- if_iwn.c  22 Jul 2014 13:12:11 -  1.133
 +++ if_iwn.c  9 Sep 2014 14:57:34 -
 @@ -220,6 +220,9 @@ int   iwn_send_btcoex(struct iwn_softc *)
  int  iwn_send_advanced_btcoex(struct iwn_softc *);
  int  iwn5000_runtime_calib(struct iwn_softc *);
  int  iwn_config(struct iwn_softc *);
 +uint16_t iwn_get_active_dwell_time(struct iwn_softc *, uint16_t, 
 uint8_t);
 +uint16_t iwn_limit_dwell(struct iwn_softc *, uint16_t);
 +uint16_t iwn_get_passive_dwell_time(struct iwn_softc *, uint16_t);
  int  iwn_scan(struct iwn_softc *, uint16_t);
  int  iwn_auth(struct iwn_softc *);
  int  iwn_run(struct iwn_softc *);
 @@ -4424,6 +4427,66 @@ iwn_config(struct iwn_softc *sc)
   return 0;
  }
  
 +uint16_t
 +iwn_get_active_dwell_time(struct iwn_softc *sc,
 +uint16_t flags, uint8_t n_probes)
 +{
 + /* No channel? Default to 2GHz settings */
 + if (flags  IEEE80211_CHAN_2GHZ) {
 + return (IWN_ACTIVE_DWELL_TIME_2GHZ +
 + IWN_ACTIVE_DWELL_FACTOR_2GHZ * (n_probes + 1));
 + }
 +
 + /* 5GHz dwell time */
 + return (IWN_ACTIVE_DWELL_TIME_5GHZ +
 + IWN_ACTIVE_DWELL_FACTOR_5GHZ * (n_probes + 1));
 +}
 +
 +/*
 + * Limit the total dwell time to 85% of the beacon interval.
 + *
 + * Returns the dwell time in milliseconds.
 + */
 +uint16_t
 +iwn_limit_dwell(struct iwn_softc *sc, uint16_t dwell_time)
 +{
 + struct ieee80211com *ic = sc-sc_ic;
 + struct ieee80211_node *ni = ic-ic_bss;
 + int bintval = 0;
 +
 + /* bintval is in TU (1.024mS) */
 + if (ni != NULL)
 + bintval = ni-ni_intval;
 +
 + /*
 +  * If it's non-zero, we should calculate the minimum of
 +  * it and the DWELL_BASE.
 +  *
 +  * XXX Yes, the math should take into account that bintval
 +  * is 1.024mS, not 1mS..
 +  */
 + if (bintval  0) {
 + return (MIN(IWN_PASSIVE_DWELL_BASE, ((bintval * 85) / 100)));
 + }
 +
 + /* No association context? Default */
 + return (IWN_PASSIVE_DWELL_BASE);
 +}
 +
 +uint16_t
 +iwn_get_passive_dwell_time(struct iwn_softc *sc, uint16_t flags)
 +{
 + uint16_t passive;
 + if (flags  IEEE80211_CHAN_2GHZ) {
 + passive = IWN_PASSIVE_DWELL_BASE + IWN_PASSIVE_DWELL_TIME_2GHZ;
 + } else {
 + passive = IWN_PASSIVE_DWELL_BASE + IWN_PASSIVE_DWELL_TIME_5GHZ;
 + }
 +
 + /* Clamp to the beacon interval if we're associated */
 + return (iwn_limit_dwell(sc, passive));
 +}
 +
  int
  iwn_scan(struct iwn_softc *sc, uint16_t flags)
  {
 @@ -4436,9 +4499,9 @@ iwn_scan(struct iwn_softc *sc, uint16_t 
   struct ieee80211_rateset *rs;
   struct ieee80211_channel *c;
   uint8_t *buf, *frm;
 - uint16_t rxchain;
 + uint16_t rxchain, dwell_active, dwell_passive;
   uint8_t txant;
 - int buflen, error;
 + int buflen, error, is_active;
  
   buf = malloc(IWN_SCAN_MAXSZ, M_DEVBUF, M_NOWAIT | M_ZERO);
   if (buf == NULL) {
 @@ -4474,7 +4537,6 @@ iwn_scan(struct iwn_softc *sc, uint16_t 
   tx-lifetime = htole32(IWN_LIFETIME_INFINITE);
  
   if (flags  IEEE80211_CHAN_5GHZ) {
 - hdr-crc_threshold = 0x;
   /* Send probe requests at 6Mbps. */
   tx-plcp = iwn_rates[IWN_RIDX_OFDM6].plcp;
   rs = ic-ic_sup_rates[IEEE80211_MODE_11A];
 @@ -4488,12 +4550,23 @@ iwn_scan(struct iwn_softc *sc, uint16_t 
   /* Use the first valid TX antenna. */
   txant = IWN_LSB(sc-txchainmask);
   tx-rflags |= IWN_RFLAG_ANT(txant);
 + 
 + /*
 +  * Only do active scanning if we're announcing a probe request
 +  * for a given SSID (or more, if we ever add it to the driver.)
 +  */
 + is_active = 0;
  
 + /*
 +  * If we're scanning for a specific SSID, add it to the 

Re: apmd hangs

2014-09-09 Thread Mark Kettenis
 Date: Tue, 9 Sep 2014 19:27:42 +0200
 From: Ingo Schwarze schwa...@usta.de
 
 Hi Mark,
 
 Mark Kettenis wrote on Mon, Sep 08, 2014 at 11:35:36PM +0200:
 
  The more code  documentation I read, the more I'm convinced that
  coordinating state changes between logical processors isn't necessary
  and actually is responsible for the hangs people have been seeing.
  
  So here is a diff that does away with it all.  I've tested it on a few
  laptops here, but it could use testing on a somewhat wider range of
  machines.  I'm especially interested in seeing this tested on a dual
  socket machine with apmd -A.
 
 i'm sorry to say it makes no difference for me (i'm not opposed to the
 diff, though).
 
 On my laptop, building ports works fine, running firefox works fine,
 but whenever i surf the web with firefox while building ports,
 the machine locks up hard.  Sometimes, the lockup already happens
 when merely starting firefox while building ports.  Often, it
 happens not when requesting a new URI, but when merely scrolling
 within the page in firefox.
 
 After the lockup, CapsLk and NmLk still toggle the respective LEDs,
 Fn-PgUp still switches on and off the torch, but nothing else has
 any effect, not even Ctrl-Alt-Esc, Ctrl-Alt-Delete, Ctrl-Alt-Backspace
 or Ctrl-Alt-F1.
 
 Unfortunately, i cannot break into ddb because i don't have a
 docking station, hence no serial console, and when going to the
 PC virtual console (Ctrl-Alt-F1), setting export DISPLAY=:0,
 and starting firefox from the console, i was unable to get any
 lockup.  Apparently, it only happens when X (or whatever) is
 actually painting something onto the screen.
 
 Whether i run with the defaults or with apm -A doesn't appear to
 make a difference.

Not sure what you mean with defaults, but if the crashes happen even
in manual performance adjustment mode, this diff certainly won't
magically fix things.



Re: sparc64: fledgling QEMU support

2014-09-09 Thread Mark Kettenis
 Date: Tue, 09 Sep 2014 19:20:09 +0100
 From: Mark Cave-Ayland mark.cave-ayl...@ilande.co.uk
 
 Hi all,
 
 Following up from my posts at the beginning of the summer, I'm pleased 
 to announce that as of today, qemu-system-sparc64 built from QEMU git 
 master will successfully install OpenBSD from an .iso and boot back into 
 it in serial mode with its default sun4u emulation:

...

 There are still some issues with the device tree to work out; in 
 particular NVRAM and networking (I'd guess that the OpenBSD sparc64 
 kernel doesn't contain the Realtek device driver so at some point I'll 
 need to create a virtual hme device) but it's good enough to 
 install/boot an OS on different hardware for testing - what could be 
 more fun than that?

Sweet.

The RealTek 8129 should be supported by the rl(4) driver, and is
AFAICT included in the RAMDISK kernel.  Not sure why it doesn't
attach.  If it is easy to hook up QEMU's e1000 hardware emulation to
the emulated sparc64 hardware, that should be supported as well on the
OpenBSD side.

OpenBSD expects the device tree node for the PS/2 keyboard to be named
8042.  That's how it is named on the Ultra AXi boards.

The NVRAM is supposed to be described by a node named eeprom under
ebus.  proper emulation of this device will get rid of the 

  unix-gettod:interpret: exception -13 caught
  interpret h# 01c099fc unix-gettod failed with error ffed
  WARNING: bad date in battery clock -- CHECK AND RESET THE DATE!

spam.

Cheers,

Mark







Re: sparc64: fledgling QEMU support

2014-09-09 Thread Mark Kettenis
 From: Stefan Fritsch s...@sfritsch.de
 Date: Tue, 09 Sep 2014 22:41:53 +0200
 
 On Tuesday 09 September 2014 21:27:37, Mark Cave-Ayland wrote:
   Could the PCI virtio stuff be adapted to non-x86 architectures?
  
  QEMU already has a virtio PCI device that can be plugged into 
  qemu-system-sparc64 (see Artyom's blog at 
  http://tyom.blogspot.co.uk/2013/03/debiansparc64-wheezy-under-qemu-h
  ow-to.html  for an example of how to do this with Linux).
  
  This could be an amusing project; in theory it would be possible to
  work  on an x86 laptop to test/debug big-endian virtio support with
  the help of QEMU's virtual hardware.  You can do this by plugging
  in a standard virtual cdrom/hd along with an additional virtio
  hd/nic, booting from the standard devices and then testing the
  drivers accessing the extra devices as required.
 
 From the openbsd side, virtio should work with sparc. But since nobody 
 has tested it on big endian so far, there will be bugs. And it needs 
 to be enabled in the config, of course.
 
 If you look at generic PCI network adapters, I would recommend trying 
 e1000 if possible. Last time I tried it (on x86), qemu's rtl8139 
 emulation did not work with openbsd's driver, and I think there were 
 some problems with the ne2k emulation, too. Or maybe ne2k just had 
 terrible performance.

Sounds like faithful emulation to me ;).



Re: possible bug in asinhl on sparc64 (libm)

2014-09-10 Thread Mark Kettenis
 Date: Sat, 6 Sep 2014 14:15:32 -0400
 From: Daniel Dickman didick...@gmail.com
 
 according to the numpy developers asinhl on sparc64 might be buggy. I
 haven't worked out a test case yet but just reporting in case anyone else
 wants to take a look as well.
 
 bug report:
 https://github.com/numpy/numpy/issues/5026#issuecomment-54711361

Our asinhl(3) implementation is actually pretty much identical to the
one in glibc.  The problem is that our sqrtl(3) implementation is
buggy, and this function is used to calculate asinhl(3) in certain
regimes.  Fix forthcoming and with that fix, the testcase in the
bugreport no longer fails.

The fix below is from FreeBSD.  I can't say I completely understand
it, but it makes some sense at least.  It's a bit different from the
FreeBSD commit as they put in some unrelated optimizations and
departed from the existing coding style a bit.

ok?


Index: libc/arch/sparc64/fpu/fpu_sqrt.c
===
RCS file: /cvs/src/lib/libc/arch/sparc64/fpu/fpu_sqrt.c,v
retrieving revision 1.3
diff -u -p -r1.3 fpu_sqrt.c
--- libc/arch/sparc64/fpu/fpu_sqrt.c26 Nov 2013 20:33:07 -  1.3
+++ libc/arch/sparc64/fpu/fpu_sqrt.c10 Sep 2014 19:47:38 -
@@ -372,12 +372,12 @@ __fpu_sqrt(fe)
FPU_SUBCS(d2, x2, t2);
FPU_SUBCS(d1, x1, t1);
FPU_SUBC(d0, x0, t0);
-   ODD_DOUBLE;
if ((int)d0 = 0) {
-   x0 = d0, x1 = d1, x2 = d2;
+   x0 = d0, x1 = d1, x2 = d2, x3 = d3;
q |= bit;
y2 |= 1;
}
+   ODD_DOUBLE;
while ((bit = 1) != 0) {
EVEN_DOUBLE;
t3 = y3 | bit;
@@ -386,7 +386,7 @@ __fpu_sqrt(fe)
FPU_SUBCS(d1, x1, t1);
FPU_SUBC(d0, x0, t0);
if ((int)d0 = 0) {
-   x0 = d0, x1 = d1, x2 = d2;
+   x0 = d0, x1 = d1, x2 = d2, x3 = d3;
q |= bit;
y3 |= bit  1;
}



Re: possible bug in asinhl on sparc64 (libm)

2014-09-10 Thread Mark Kettenis
 Date: Wed, 10 Sep 2014 21:52:30 +0200 (CEST)
 From: Mark Kettenis mark.kette...@xs4all.nl
 
  Date: Sat, 6 Sep 2014 14:15:32 -0400
  From: Daniel Dickman didick...@gmail.com
  
  according to the numpy developers asinhl on sparc64 might be buggy. I
  haven't worked out a test case yet but just reporting in case anyone else
  wants to take a look as well.
  
  bug report:
  https://github.com/numpy/numpy/issues/5026#issuecomment-54711361
 
 Our asinhl(3) implementation is actually pretty much identical to the
 one in glibc.  The problem is that our sqrtl(3) implementation is
 buggy, and this function is used to calculate asinhl(3) in certain
 regimes.  Fix forthcoming and with that fix, the testcase in the
 bugreport no longer fails.
 
 The fix below is from FreeBSD.  I can't say I completely understand
 it, but it makes some sense at least.  It's a bit different from the
 FreeBSD commit as they put in some unrelated optimizations and
 departed from the existing coding style a bit.
 
 ok?

Now the fix for _Qp_sqrt() soft-float routine is enough to fix
asinhl(3), but only because gcc decides to directly invoke it inside
asinhl(3).  But in other contexts it decides to actually emit a
sqrtl(3) library call, which invokes a totally different
implementation.  And that other implementation, which lives in
libm/src/e_sqrtl.c, is buggy too.

So here is a diff that overrides this file for sparc64 with some code
that simply invokes _Qp_sqrtl(3).

ok?

Index: libm/Makefile
===
RCS file: /cvs/src/lib/libm/Makefile,v
retrieving revision 1.106
diff -u -p -r1.106 Makefile
--- libm/Makefile   18 Mar 2014 22:36:30 -  1.106
+++ libm/Makefile   10 Sep 2014 19:56:16 -
@@ -67,6 +67,7 @@ ARCH_SRCS = e_sqrt.c e_sqrtf.c s_fabsf.c
 .PATH: ${.CURDIR}/arch/sparc
 .elif (${MACHINE_ARCH} == sparc64)
 .PATH: ${.CURDIR}/arch/sparc64
+ARCH_SRCS = e_sqrtl.c
 .elif (${MACHINE_ARCH} == vax)
 .PATH: ${.CURDIR}/arch/vax
 NOIEEE_ARCH = n_argred.S n_infnan.S n_sqrt.S
Index: libm/arch/sparc64/e_sqrtl.c
===
RCS file: libm/arch/sparc64/e_sqrtl.c
diff -N libm/arch/sparc64/e_sqrtl.c
--- /dev/null   1 Jan 1970 00:00:00 -
+++ libm/arch/sparc64/e_sqrtl.c 10 Sep 2014 19:56:16 -
@@ -0,0 +1,29 @@
+/* $OpenBSD$   */
+/*
+ * Copyright (c) 2014 Mark Kettenis
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include math.h
+
+extern void _Qp_sqrt(long double *, long double *);
+
+long double
+sqrtl(long double x)
+{
+   long double y;
+
+   _Qp_sqrt(y, x);
+   return y;
+}



Re: mpsafe mpi

2014-09-14 Thread Mark Kettenis
 Date: Tue, 9 Sep 2014 15:10:46 +1000
 From: David Gwynne da...@gwynne.id.au
 
 mpsafe mpi. eyes would be appreciated. tests well to me.
 
 ok?

Tested on 

mpi0 at pci5 dev 0 function 0 Symbios Logic SAS1064E rev 0x02: msi

Couldn't find any problems in the code, so

ok kettenis@

 Index: ic/mpi.c
 ===
 RCS file: /cvs/src/sys/dev/ic/mpi.c,v
 retrieving revision 1.195
 diff -u -p -r1.195 mpi.c
 --- ic/mpi.c  3 Sep 2014 00:46:04 -   1.195
 +++ ic/mpi.c  9 Sep 2014 05:09:37 -
 @@ -1311,6 +1311,8 @@ mpi_scsi_cmd(struct scsi_xfer *xs)
  
   DNPRINTF(MPI_D_CMD, %s: mpi_scsi_cmd\n, DEVNAME(sc));
  
 + KERNEL_UNLOCK();
 +
   if (xs-cmdlen  MPI_CDB_LEN) {
   DNPRINTF(MPI_D_CMD, %s: CBD too big %d\n,
   DEVNAME(sc), xs-cmdlen);
 @@ -1319,8 +1321,7 @@ mpi_scsi_cmd(struct scsi_xfer *xs)
   xs-sense.flags = SKEY_ILLEGAL_REQUEST;
   xs-sense.add_sense_code = 0x20;
   xs-error = XS_SENSE;
 - scsi_done(xs);
 - return;
 + goto done;
   }
  
   ccb = xs-io;
 @@ -1372,23 +1373,25 @@ mpi_scsi_cmd(struct scsi_xfer *xs)
   htolem32(io-sense_buf_low_addr, ccb-ccb_cmd_dva +
   ((u_int8_t *)mcb-mcb_sense - (u_int8_t *)mcb));
  
 - if (mpi_load_xs(ccb) != 0) {
 - xs-error = XS_DRIVER_STUFFUP;
 - scsi_done(xs);
 - return;
 - }
 + if (mpi_load_xs(ccb) != 0)
 + goto stuffup;
  
   timeout_set(xs-stimeout, mpi_timeout_xs, ccb);
  
   if (xs-flags  SCSI_POLL) {
 - if (mpi_poll(sc, ccb, xs-timeout) != 0) {
 - xs-error = XS_DRIVER_STUFFUP;
 - scsi_done(xs);
 - }
 - return;
 - }
 + if (mpi_poll(sc, ccb, xs-timeout) != 0)
 + goto stuffup;
 + } else
 + mpi_start(sc, ccb);
  
 - mpi_start(sc, ccb);
 + KERNEL_LOCK();
 + return;
 +
 +stuffup:
 + xs-error = XS_DRIVER_STUFFUP;
 +done:
 + KERNEL_LOCK();
 + scsi_done(xs);
  }
  
  void
 @@ -1415,7 +1418,9 @@ mpi_scsi_cmd_done(struct mpi_ccb *ccb)
   if (ccb-ccb_rcb == NULL) {
   /* no scsi error, we're ok so drop out early */
   xs-status = SCSI_OK;
 + KERNEL_LOCK();
   scsi_done(xs);
 + KERNEL_UNLOCK();
   return;
   }
  
 @@ -2370,14 +2375,18 @@ mpi_evt_sas(struct mpi_softc *sc, struct
   switch (ch-reason) {
   case MPI_EVT_SASCH_REASON_ADDED:
   case MPI_EVT_SASCH_REASON_NO_PERSIST_ADDED:
 + KERNEL_LOCK();
   if (scsi_req_probe(sc-sc_scsibus, ch-target, -1) != 0) {
   printf(%s: unable to request attach of %d\n,
   DEVNAME(sc), ch-target);
   }
 + KERNEL_UNLOCK();
   break;
  
   case MPI_EVT_SASCH_REASON_NOT_RESPONDING:
 + KERNEL_LOCK();
   scsi_activate(sc-sc_scsibus, ch-target, -1, DVACT_DEACTIVATE);
 + KERNEL_UNLOCK();
  
   mtx_enter(sc-sc_evt_scan_mtx);
   SIMPLEQ_INSERT_TAIL(sc-sc_evt_scan_queue, rcb, rcb_link);
 @@ -2451,11 +2460,13 @@ mpi_evt_sas_detach_done(struct mpi_ccb *
   struct mpi_softc*sc = ccb-ccb_sc;
   struct mpi_msg_scsi_task_reply  *r = ccb-ccb_rcb-rcb_reply;
  
 + KERNEL_LOCK();
   if (scsi_req_detach(sc-sc_scsibus, r-target_id, -1,
   DETACH_FORCE) != 0) {
   printf(%s: unable to request detach of %d\n,
   DEVNAME(sc), r-target_id);
   }
 + KERNEL_UNLOCK();
  
   mpi_push_reply(sc, ccb-ccb_rcb);
   scsi_io_put(sc-sc_iopool, ccb);
 Index: pci/mpi_pci.c
 ===
 RCS file: /cvs/src/sys/dev/pci/mpi_pci.c,v
 retrieving revision 1.24
 diff -u -p -r1.24 mpi_pci.c
 --- pci/mpi_pci.c 9 Jun 2011 04:55:44 -   1.24
 +++ pci/mpi_pci.c 9 Sep 2014 05:09:37 -
 @@ -142,7 +142,7 @@ mpi_pci_attach(struct device *parent, st
   goto unmap;
   }
   intrstr = pci_intr_string(psc-psc_pc, ih);
 - psc-psc_ih = pci_intr_establish(psc-psc_pc, ih, IPL_BIO,
 + psc-psc_ih = pci_intr_establish(psc-psc_pc, ih, IPL_BIO | IPL_MPSAFE,
   mpi_intr, sc, sc-sc_dev.dv_xname);
   if (psc-psc_ih == NULL) {
   printf(: unable to map interrupt%s%s\n,
 



Re: ATI SB400 PCI bridge fallback to substractive decode

2014-09-14 Thread Mark Kettenis
 Date: Sun, 14 Sep 2014 23:57:09 +0200
 From: Thierry Deval thierry+openbsd.t...@deval.be
 
 Hi,
 
 When I put a CF to PCCard adapter (not CardBus) in my laptop to work on a CF 
 boot image, I was surprised by this kernel message :
 
 ** wdc2 at pcmcia0 function 0 SanDisk, SDP, 5/3 0.6: can't handle card info
 
 And the card was not working at all, as weren't any other PCCard I tried 
 afterwards.
 
 After digging and enabling as much debugging as I could, I found that the 
 cardbus bridge (TI PCI7xx1 CardBus) couldn't allocate any IO or mem range 
 for the cards.
 
 Digging deeper to understand how the allocation should work, I noticed 
 a comment in dev/pci/ppb.c talking about handling substractive (or is it 
 really 'subtractive' as in the comment ? ) decode scheme for more than the 
 Intel 82801 PCI bridge.
 So, as a test, I included the ATI SB400 PCI bridge (behind which the TI 
 PCI7xx1 CardBus bridge was lying) to the substractive decode treatment. 
 I was rewarded by a working CF card.
 
 ** ppb2 at pci0 dev 20 function 4 ATI SB400 PCI rev 0x00
 ** pci3 at ppb2 bus 6
 ** cbb0 at pci3 dev 9 function 0 TI PCI7XX1 CardBus rev 0x00: apic 1 int 23
 ** cardslot0 at cbb0 slot 0 flags 0
 ** cardbus0 at cardslot0: bus 7 device 0 cacheline 0x8, lattimer 0x20
 ** pcmcia0 at cardslot0
 ** wdc2 at pcmcia0 function 0 SanDisk, SDP, 5/3 0.6 port 0xa000/16
 ** wd1 at wdc2 channel 0 drive 0: SanDisk SDCFB-64
 ** wd1: 1-sector PIO, LBA, 61MB, 125440 sectors
 ** wd1(wdc2:0:0): using BIOS timings
 ** wd1 detached
 ** wdc2 detached
 
 I don't know if anybody else have seen the same problem but I would be glad 
 to have a confirmation of the fix.
 
 Here's the diff:
 
 Index: ppb.c
 ===
 RCS file: /cvs/src/sys/dev/pci/ppb.c,v
 retrieving revision 1.58
 diff -u -p -u -p -r1.58 ppb.c
 --- ppb.c 12 Jul 2014 18:48:52 -  1.58
 +++ ppb.c 14 Sep 2014 17:00:40 -
 @@ -286,9 +286,11 @@ ppbattach(struct device *parent, struct
* XXX We probably should handle subtractive decode bridges
* in general.
*/
 - if (PCI_VENDOR(pa-pa_id) == PCI_VENDOR_INTEL 
 + if ((PCI_VENDOR(pa-pa_id) == PCI_VENDOR_INTEL 
   (PCI_PRODUCT(pa-pa_id) == PCI_PRODUCT_INTEL_82801BA_HPB ||
 - PCI_PRODUCT(pa-pa_id) == PCI_PRODUCT_INTEL_82801BAM_HPB)) {
 +  PCI_PRODUCT(pa-pa_id) == PCI_PRODUCT_INTEL_82801BAM_HPB)) ||
 + (PCI_VENDOR(pa-pa_id) == PCI_VENDOR_ATI 
 +  PCI_PRODUCT(pa-pa_id) == PCI_PRODUCT_ATI_SB400_PCI)){
   if (sc-sc_ioex == NULL)
   sc-sc_ioex = pa-pa_ioex;
   if (sc-sc_memex == NULL)
 
 I hope this could go in, at least as a first step.
 I plan on looking further about what this substractive decode means, and 
 if there could be a generic way of enabling it where supported...

Can you send pcidump -vxxx output for that machine?



Re: ATI SB400 PCI bridge fallback to substractive decode

2014-09-15 Thread Mark Kettenis
 Date: Mon, 15 Sep 2014 09:37:56 +0200
 From: Thierry Deval thierry+openbsd.t...@deval.be
 
 On Mon, Sep 15, 2014 at 12:25:19AM +0200, Mark Kettenis wrote:
  Date: Sun, 14 Sep 2014 23:57:09 +0200
  From: Thierry Deval thierry+openbsd.t...@deval.be
 
  Hi,
 
  When I put a CF to PCCard adapter (not CardBus) in my laptop to work on a 
  CF
  boot image, I was surprised by this kernel message :
 
  ** wdc2 at pcmcia0 function 0 SanDisk, SDP, 5/3 0.6: can't handle card 
  info
 
  And the card was not working at all, as weren't any other PCCard I tried
  afterwards.
 
  After digging and enabling as much debugging as I could, I found that the
  cardbus bridge (TI PCI7xx1 CardBus) couldn't allocate any IO or mem range
  for the cards.
 
  Digging deeper to understand how the allocation should work, I noticed
  a comment in dev/pci/ppb.c talking about handling substractive (or is it
  really 'subtractive' as in the comment ? ) decode scheme for more than the
  Intel 82801 PCI bridge.
  So, as a test, I included the ATI SB400 PCI bridge (behind which the TI
  PCI7xx1 CardBus bridge was lying) to the substractive decode treatment.
  I was rewarded by a working CF card.
 
  ** ppb2 at pci0 dev 20 function 4 ATI SB400 PCI rev 0x00
  ** pci3 at ppb2 bus 6
  ** cbb0 at pci3 dev 9 function 0 TI PCI7XX1 CardBus rev 0x00: apic 1 int 
  23
  ** cardslot0 at cbb0 slot 0 flags 0
  ** cardbus0 at cardslot0: bus 7 device 0 cacheline 0x8, lattimer 0x20
  ** pcmcia0 at cardslot0
  ** wdc2 at pcmcia0 function 0 SanDisk, SDP, 5/3 0.6 port 0xa000/16
  ** wd1 at wdc2 channel 0 drive 0: SanDisk SDCFB-64
  ** wd1: 1-sector PIO, LBA, 61MB, 125440 sectors
  ** wd1(wdc2:0:0): using BIOS timings
  ** wd1 detached
  ** wdc2 detached
 
  I don't know if anybody else have seen the same problem but I would be glad
  to have a confirmation of the fix.
 
  Here's the diff:
 
  Index: ppb.c
  ===
  RCS file: /cvs/src/sys/dev/pci/ppb.c,v
  retrieving revision 1.58
  diff -u -p -u -p -r1.58 ppb.c
  --- ppb.c  12 Jul 2014 18:48:52 -  1.58
  +++ ppb.c  14 Sep 2014 17:00:40 -
  @@ -286,9 +286,11 @@ ppbattach(struct device *parent, struct
  * XXX We probably should handle subtractive decode bridges
  * in general.
  */
  -  if (PCI_VENDOR(pa-pa_id) == PCI_VENDOR_INTEL 
  +  if ((PCI_VENDOR(pa-pa_id) == PCI_VENDOR_INTEL 
 (PCI_PRODUCT(pa-pa_id) == PCI_PRODUCT_INTEL_82801BA_HPB ||
  -  PCI_PRODUCT(pa-pa_id) == PCI_PRODUCT_INTEL_82801BAM_HPB)) {
  +   PCI_PRODUCT(pa-pa_id) == PCI_PRODUCT_INTEL_82801BAM_HPB)) ||
  +  (PCI_VENDOR(pa-pa_id) == PCI_VENDOR_ATI 
  +   PCI_PRODUCT(pa-pa_id) == PCI_PRODUCT_ATI_SB400_PCI)){
 if (sc-sc_ioex == NULL)
 sc-sc_ioex = pa-pa_ioex;
 if (sc-sc_memex == NULL)
 
  I hope this could go in, at least as a first step.
  I plan on looking further about what this substractive decode means, and
  if there could be a generic way of enabling it where supported...
 
 Can you send pcidump -vxxx output for that machine?
 
 Sure Mark, here it is.

Thanks.  So your PCI bridge properly advertises itself as subtractive
decode.  Can you try the diff below?

Thanks,

Mark

Index: ppbreg.h
===
RCS file: /cvs/src/sys/dev/pci/ppbreg.h,v
retrieving revision 1.4
diff -u -p -r1.4 ppbreg.h
--- ppbreg.h19 Nov 2009 20:43:32 -  1.4
+++ ppbreg.h15 Sep 2014 09:14:53 -
@@ -40,6 +40,11 @@
  */
 
 /*
+ * PCI Programming Interface register.
+ */
+#definePPB_INTERFACE_SUBTRACTIVE   0x01
+
+/*
  * Register offsets
  */
 #definePPB_REG_BASE0   0x10/* Base Addr Reg. 0 */
Index: ppb.c
===
RCS file: /cvs/src/sys/dev/pci/ppb.c,v
retrieving revision 1.58
diff -u -p -r1.58 ppb.c
--- ppb.c   12 Jul 2014 18:48:52 -  1.58
+++ ppb.c   15 Sep 2014 09:14:53 -
@@ -146,6 +146,7 @@ ppbattach(struct device *parent, struct 
struct pci_attach_args *pa = aux;
pci_chipset_tag_t pc = pa-pa_pc;
struct pcibus_attach_args pba;
+   pci_interface_t interface;
pci_intr_handle_t ih;
pcireg_t busdata, reg, blr;
char *name;
@@ -206,9 +207,18 @@ ppbattach(struct device *parent, struct 
 
printf(\n);
 
-   if (PCI_VENDOR(pa-pa_id) != PCI_VENDOR_INTEL ||
-   (PCI_PRODUCT(pa-pa_id) != PCI_PRODUCT_INTEL_82801BA_HPB 
-   PCI_PRODUCT(pa-pa_id) != PCI_PRODUCT_INTEL_82801BAM_HPB))
+   interface = PCI_INTERFACE(pa-pa_class);
+
+   /*
+* The Intel 82801BAM Hub-to-PCI can decode subtractively but
+* doesn't advertise itself as such.
+*/
+   if (PCI_VENDOR(pa-pa_id) == PCI_VENDOR_INTEL 
+   (PCI_PRODUCT(pa-pa_id) == PCI_PRODUCT_INTEL_82801BA_HPB

Re: make top combine cpu lines by default if you have a lot of cpus

2014-09-16 Thread Mark Kettenis
 Date: Tue, 16 Sep 2014 21:51:00 +1000
 From: David Gwynne da...@gwynne.id.au
 
 if you have more than 8 cpus, combine the cpu lines by default.
 
 ok?

8 seems a reasonable number

ok kettenis@

 Index: machine.c
 ===
 RCS file: /cvs/src/usr.bin/top/machine.c,v
 retrieving revision 1.78
 diff -u -p -r1.78 machine.c
 --- machine.c 4 Jul 2014 05:58:31 -   1.78
 +++ machine.c 16 Sep 2014 11:46:41 -
 @@ -141,14 +141,26 @@ int ncpu;
  unsigned int maxslp;
  
  int
 -machine_init(struct statics *statics)
 +getncpu(void)
  {
 + int mib[] = { CTL_HW, HW_NCPU };
 + int ncpu;
   size_t size = sizeof(ncpu);
 - int mib[2], pagesize, cpu;
  
 - mib[0] = CTL_HW;
 - mib[1] = HW_NCPU;
 - if (sysctl(mib, 2, ncpu, size, NULL, 0) == -1)
 + if (sysctl(mib, sizeof(mib) / sizeof(mib[0]),
 + ncpu, size, NULL, 0) == -1)
 + return (-1);
 +
 + return (ncpu);
 +}
 +
 +int
 +machine_init(struct statics *statics)
 +{
 + int pagesize, cpu;
 +
 + ncpu = getncpu();
 + if (ncpu == -1)
   return (-1);
   cpu_states = calloc(ncpu, CPUSTATES * sizeof(int64_t));
   if (cpu_states == NULL)
 Index: machine.h
 ===
 RCS file: /cvs/src/usr.bin/top/machine.h,v
 retrieving revision 1.17
 diff -u -p -r1.17 machine.h
 --- machine.h 5 Jun 2012 18:52:53 -   1.17
 +++ machine.h 16 Sep 2014 11:46:41 -
 @@ -93,3 +93,5 @@ extern char*format_next_process(cadd
  extern uid_tproc_owner(pid_t);
  
  extern struct kinfo_proc *getprocs(int, int, int *);
 +
 +int  getncpu(void);
 Index: top.c
 ===
 RCS file: /cvs/src/usr.bin/top/top.c,v
 retrieving revision 1.81
 diff -u -p -r1.81 top.c
 --- top.c 7 Apr 2014 15:49:22 -   1.81
 +++ top.c 16 Sep 2014 11:46:41 -
 @@ -250,6 +250,13 @@ parseargs(int ac, char **av)
   }
   }
  
 + i = getncpu();
 + if (i == -1)
 + err(1, NULL);
 +
 + if (i  8)
 + combine_cpus = 1;
 +
   /* get count of top processes to display (if any) */
   if (optind  ac) {
   if ((topn = atoiwi(av[optind])) == Invalid) {
 
 



Re: Fix for POSIX conformance issue

2014-09-17 Thread Mark Kettenis
 From: j...@wxcvbn.org (=?utf-8?Q?J=C3=A9r=C3=A9mie_Courr=C3=A8ges-Anglas?=)
 Date: Wed, 17 Sep 2014 13:51:37 +0200
 
 Todd C. Miller todd.mil...@courtesan.com writes:
 
  I have no objection to this but I don't think the System-V setpgrp()
  API belongs in compat-43.  We can just move it to gen/setpgrp.c.
 
  Like Ted says, we should ready the source tree first by using
  setpgid().  However, all the uses of setpgrp() in the tree are the
  equivalent of:
 
  setpgrp(0, getpid());
 
  which could be replaced more simply by:
 
  setpgid(0, 0);
 
 I agree that the source tree should use setpgid().
 
 However I don't think that changing our setpgrp definition would bring
 much (any?) benefit.  The mismatch here between SysV and BSD is known
 since a long time, and I bet that a bunch of stuff in ports will use the
 BSD idiom inside simple #ifdef BSD checks.  I have no idea right now of
 the number of ports that would be affected, but the efforts spent by
 porters on this issue should considered.

Note that the SysV version of setpgrp is marked as an XSI extension in
the combined POSIX and X/Open specification.  As such it isn't
actually part of POSIX and isn't needed for POSIX compliance.



physical disk support for mfii(4)

2014-09-20 Thread Mark Kettenis
The diff below adds support for physical disks to mfii(4).  Just
like with mfi(4) you can configure this hardware (or at least some
models) to expose disks that have not been assigned to a logical volume
to the host.  This diff makes those disks accesable from OpenBSD.

I would appreciate some tests, especially from people who have
configured some logical volumes on their hardware.


Index: mfii.c
===
RCS file: /cvs/src/sys/dev/pci/mfii.c,v
retrieving revision 1.17
diff -u -p -r1.17 mfii.c
--- mfii.c  13 Jul 2014 23:10:23 -  1.17
+++ mfii.c  20 Sep 2014 18:20:30 -
@@ -48,6 +48,7 @@
 #define MFII_REQ_TYPE_LDIO (0x7  1)
 #define MFII_REQ_TYPE_MFA  (0x1  1)
 #define MFII_REQ_TYPE_NO_LOCK  (0x2  1)
+#define MFII_REQ_TYPE_HI_PRI   (0x6  1)
 
 #define MFII_REQ_MFA(_a)   htole64((_a) | MFII_REQ_TYPE_MFA)
 
@@ -59,9 +60,11 @@ struct mfii_request_descr {
u_int16_t   smid;
 
u_int16_t   lmid;
-   u_int16_t   field;
+   u_int16_t   dev_handle;
 } __packed;
 
+#define MFII_RAID_CTX_IO_TYPE_SYSPD(0x1  4)
+
 struct mfii_raid_context {
u_int8_ttype_nseg;
u_int8_t_reserved1;
@@ -105,6 +108,34 @@ struct mfii_sge {
 
 #define MFII_REQUEST_SIZE  256
 
+#define MR_DCMD_LD_MAP_GET_INFO0x0300e101
+
+#define MFII_MAX_ROW   32
+#define MFII_MAX_ARRAY 128
+
+struct mfii_array_map {
+   uint16_tmam_pd[MFII_MAX_ROW];
+} __packed;
+
+struct mfii_dev_handle {
+   uint16_tmdh_cur_handle;
+   uint8_t mdh_valid;
+   uint8_t mdh_reserved;
+   uint16_tmdh_handle[2];
+} __packed;
+
+struct mfii_ld_map {
+   uint32_tmlm_total_size;
+   uint32_tmlm_reserved1[5];
+   uint32_tmlm_num_lds;
+   uint32_tmlm_reserved2;
+   uint8_t mlm_tgtid_to_ld[2 * MFI_MAX_LD];
+   uint8_t mlm_pd_timeout;
+   uint8_t mlm_reserved3[7];
+   struct mfii_array_map   mlm_am[MFII_MAX_ARRAY];
+   struct mfii_dev_handle  mlm_dev_handle[MFI_MAX_PD];
+} __packed;
+
 struct mfii_dmamem {
bus_dmamap_tmdm_map;
bus_dma_segment_t   mdm_seg;
@@ -156,6 +187,19 @@ struct mfii_ccb {
 };
 SIMPLEQ_HEAD(mfii_ccb_list, mfii_ccb);
 
+struct mfii_pd_link {
+   u_int16_t   pd_id;
+   struct mfi_pd_details   pd_info;
+   u_int16_t   pd_handle;
+};
+
+struct mfii_pd_softc {
+   struct scsi_linkpd_link;
+   struct scsibus_softc*pd_scsibus;
+   struct mfii_pd_link *pd_links[MFI_MAX_PD];
+   uint8_t pd_timeout;
+};
+
 struct mfii_softc {
struct device   sc_dev;
 
@@ -189,6 +233,7 @@ struct mfii_softc {
 
struct scsi_linksc_link;
struct scsibus_softc*sc_scsibus;
+   struct mfii_pd_softc*sc_pd;
struct scsi_iopool  sc_iopool;
 
struct mfi_ctrl_infosc_info;
@@ -222,6 +267,15 @@ struct scsi_adapter mfii_switch = {
NULL  /* ioctl */
 };
 
+void   mfii_pd_scsi_cmd(struct scsi_xfer *);
+intmfii_pd_scsi_probe(struct scsi_link *);
+
+struct scsi_adapter mfii_pd_switch = {
+   mfii_pd_scsi_cmd,
+   scsi_minphys,
+   mfii_pd_scsi_probe
+};
+
 #define DEVNAME(_sc)   ((_sc)-sc_dev.dv_xname)
 
 u_int32_t  mfii_read(struct mfii_softc *, bus_size_t);
@@ -239,6 +293,7 @@ voidmfii_scrub_ccb(struct mfii_ccb 
*)
 intmfii_transition_firmware(struct mfii_softc *);
 intmfii_initialise_firmware(struct mfii_softc *);
 intmfii_get_info(struct mfii_softc *);
+intmfii_syspd(struct mfii_softc *);
 
 void   mfii_start(struct mfii_softc *, struct mfii_ccb *);
 void   mfii_done(struct mfii_softc *, struct mfii_ccb *);
@@ -264,6 +319,8 @@ int mfii_scsi_cmd_io(struct mfii_softc
struct scsi_xfer *);
 intmfii_scsi_cmd_cdb(struct mfii_softc *,
struct scsi_xfer *);
+intmfii_pd_scsi_cmd_cdb(struct mfii_softc *,
+   struct scsi_xfer *);
 
 
 #define mfii_fw_state(_sc) mfii_read((_sc), MFI_OSP)
@@ -388,6 +445,8 @@ mfii_attach(struct device *parent, struc
 
config_found(sc-sc_dev, saa, scsiprint);
 
+   mfii_syspd(sc);
+
/* enable interrupts */
mfii_write(sc, MFI_OSTS, 0x);
mfii_write(sc, MFI_OMSK, ~MFII_OSTS_INTR_VALID);
@@ -406,6 +465,92 @@ pci_unmap:
 }
 
 int
+mfii_syspd(struct mfii_softc *sc)
+{
+   struct scsibus_attach_args saa;
+   struct scsi_link *link;
+   struct mfii_ld_map *lm;
+   struct 

Re: Missing include in sys/ipc.h

2014-09-20 Thread Mark Kettenis
 Date: Sat, 20 Sep 2014 18:15:31 +
 From: Miod Vallat m...@online.fr
 
  shmctl(2)/shmget(2)/shmat(2) all document
  
  #include sys/types.h
  #include sys/ipc.h
  #include sys/shm.h
  
  as a requirement for calling these functions.
 
 That was my first thought, but according to
 http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/sys_ipc.h.html
 ``The sys/ipc.h header shall define the uid_t, gid_t, mode_t and key_t
 types as described in sys/types.h'', which is currently not the case.

Unfortunately it doesn't allow us to make everything in sys/types.h
available though.  So simply including sys/types.h from sys/ipc.h
isn't the right solution.



Re: [patch] puc(4) add Winchiphead CH382 support

2014-09-22 Thread Mark Kettenis
 Date: Tue, 23 Sep 2014 05:44:04 +0900
 From: SASANO Takayoshi u...@mx5.nisiq.net
 
 Hello,
 
 Here is the patch to support Winchiphead CH382 PCIe-UART device.
 I found the board at eBay with cheap price tag.
 
 CH382 has three configurations and different PCI device ID.
 
   - 2 serial (2S)
   - 2 serial and 1 parallel (2S1P)
   - 1 parallel (1P)
 
 I have 2S board the patch does not support parallel port,
 2S1P board will work as 2S.
 
 After patching, CH382 is recognized as 16750 like this.
 
 puc0 at pci3 dev 0 function 0 Nanjing QinHeng Electronics CH382 rev 0x10: 
 port
 s: 2 com
 com4 at puc0 port 0 apic 5 int 17: ti16750, 64 byte fifo
 com5 at puc0 port 1 apic 5 int 17: ti16750, 64 byte fifo
 
 I referred the following pages:
   http://www.spinics.net/lists/linux-serial/msg11744.html
   http://kent-vandervelden.blogspot.jp/2014/08/linux-parallel-port-cards.html
 
 Can I commit?

ok kettenis@

 Regards,
 -- 
 SASANO Takayoshi u...@mx5.nisiq.net
 
 Index: pucdata.c
 ===
 RCS file: /cvs/src/sys/dev/pci/pucdata.c,v
 retrieving revision 1.93
 diff -u -p -r1.93 pucdata.c
 --- pucdata.c 13 Aug 2014 07:45:37 -  1.93
 +++ pucdata.c 21 Sep 2014 12:28:26 -
 @@ -2080,6 +2080,22 @@ const struct puc_device_description puc_
   { PUC_COM_POW2(0), 0x14, 0x },
   },
   },
 + {   /* WinChipHead CH382 (2S), */
 + {   PCI_VENDOR_WCH2, PCI_PRODUCT_WCH2_CH382_1,  0, 0},
 + {   0x, 0x, 0, 0},
 + {
 + { PUC_COM_POW2(0), 0x10, 0x00c0 },
 + { PUC_COM_POW2(0), 0x10, 0x00c8 },
 + },
 + },
 + {   /* WinChipHead CH382 (2S1P), */
 + {   PCI_VENDOR_WCH2, PCI_PRODUCT_WCH2_CH382_2,  0, 0},
 + {   0x, 0x, 0, 0},
 + {
 + { PUC_COM_POW2(0), 0x10, 0x00c0 },
 + { PUC_COM_POW2(0), 0x10, 0x00c8 },
 + },
 + },
   {   /* NetMos NM9820 UART */
   {   PCI_VENDOR_NETMOS, PCI_PRODUCT_NETMOS_NM9820,   0, 0},
   {   0x, 0x, 0, 0},
 
 



page zeroing

2014-09-26 Thread Mark Kettenis
The diff below adds a kernel thread that makes memory pages filled
with zeroes without olding the kernel lock.  The idea is that this
should speed up MP systems because the kernel can do some useful work
in parallel with other things, and could lower the latency on all
systems because (userland) memory page allocation will be faster.  The
thread runs at the absolutely lowest priority such that we only run it
if we don't have anything else to do.

But this could also slow down some systems though, because zeroing
pages can thrash the caches in your system.

So I'd like to see this diff tested on a wide variety of systems, and
hear back from people how this diff affects their OpenBSD systems.
Subjective reports are ok; hard benchmarks are better.

One thing to look at when you're running this diff is the output of
vmstat -s.  It will report something like:

2146380 zeroed page hits
  34258 zeroed page misses

You want the number of hits to be significantly larger than the number
of misses.  And you want the number of hits to keep growing.  Another
thing to look at is systat vm.  If you mke your screen a bit bigger
than 25 lines, you'll see a pzidle counter which indicates how much
zeroed pages have been consumed (negative) or created (positive).

Thanks,

Mark

Index: uvm/uvm_extern.h
===
RCS file: /home/cvs/src/sys/uvm/uvm_extern.h,v
retrieving revision 1.119
diff -u -p -r1.119 uvm_extern.h
--- uvm/uvm_extern.h11 Jul 2014 16:35:40 -  1.119
+++ uvm/uvm_extern.h12 Jul 2014 19:02:23 -
@@ -519,6 +519,7 @@ voiduvm_vnp_sync(struct mount *);
 void   uvm_vnp_terminate(struct vnode *);
 boolean_t  uvm_vnp_uncache(struct vnode *);
 struct uvm_object  *uvn_attach(struct vnode *, vm_prot_t);
+void   uvm_pagezero_thread(void *);
 void   kmeminit_nkmempages(void);
 void   kmeminit(void);
 extern u_int   nkmempages;
Index: uvm/uvm_page.h
===
RCS file: /home/cvs/src/sys/uvm/uvm_page.h,v
retrieving revision 1.54
diff -u -p -r1.54 uvm_page.h
--- uvm/uvm_page.h  11 Jul 2014 16:35:40 -  1.54
+++ uvm/uvm_page.h  12 Jul 2014 19:02:23 -
@@ -296,7 +296,7 @@ int vm_physseg_find(paddr_t, int *);
 #define uvm_lock_fpageq()  mtx_enter(uvm.fpageqlock);
 #define uvm_unlock_fpageq()mtx_leave(uvm.fpageqlock);
 
-#defineUVM_PAGEZERO_TARGET (uvmexp.free)
+#defineUVM_PAGEZERO_TARGET (uvmexp.free / 8)
 
 #define VM_PAGE_TO_PHYS(entry) ((entry)-phys_addr)
 
Index: uvm/uvm_pmemrange.c
===
RCS file: /home/cvs/src/sys/uvm/uvm_pmemrange.c,v
retrieving revision 1.41
diff -u -p -r1.41 uvm_pmemrange.c
--- uvm/uvm_pmemrange.c 14 Sep 2014 14:17:27 -  1.41
+++ uvm/uvm_pmemrange.c 24 Sep 2014 15:29:00 -
@@ -21,6 +21,7 @@
 #include uvm/uvm.h
 #include sys/malloc.h
 #include sys/kernel.h
+#include sys/kthread.h
 #include sys/mount.h
 
 /*
@@ -107,7 +108,7 @@ voiduvm_pmr_assertvalid(struct uvm_pmem
 #endif
 
 int uvm_pmr_get1page(psize_t, int, struct pglist *,
-   paddr_t, paddr_t);
+   paddr_t, paddr_t, int);
 
 struct uvm_pmemrange   *uvm_pmr_allocpmr(void);
 struct vm_page *uvm_pmr_nfindsz(struct uvm_pmemrange *, psize_t, int);
@@ -824,7 +825,7 @@ retry_desperate:
if (count = maxseg  align == 1  boundary == 0 
(flags  UVM_PLA_TRYCONTIG) == 0) {
fcount += uvm_pmr_get1page(count - fcount, memtype_init,
-   result, start, end);
+   result, start, end, 0);
 
/*
 * If we found sufficient pages, go to the succes exit code.
@@ -1036,6 +1037,8 @@ out:
 
if (found-pg_flags  PG_ZERO) {
uvmexp.zeropages--;
+   if (uvmexp.zeropages  UVM_PAGEZERO_TARGET)
+   wakeup(uvmexp.zeropages);
}
if (flags  UVM_PLA_ZERO) {
if (found-pg_flags  PG_ZERO)
@@ -1130,6 +1133,8 @@ uvm_pmr_freepages(struct vm_page *pg, ps
pg += pmr_count;
}
wakeup(uvmexp.free);
+   if (uvmexp.zeropages  UVM_PAGEZERO_TARGET)
+   wakeup(uvmexp.zeropages);
 
uvm_wakeup_pla(VM_PAGE_TO_PHYS(firstpg), ptoa(count));
 
@@ -1167,6 +1172,8 @@ uvm_pmr_freepageq(struct pglist *pgl)
uvm_wakeup_pla(pstart, ptoa(plen));
}
wakeup(uvmexp.free);
+   if (uvmexp.zeropages  UVM_PAGEZERO_TARGET)
+   wakeup(uvmexp.zeropages);
uvm_unlock_fpageq();
 
return;
@@ -1663,7 +1670,7 @@ uvm_pmr_rootupdate(struct uvm_pmemrange 
  */
 int
 uvm_pmr_get1page(psize_t count, int memtype_init, struct 

Re: page zeroing

2014-09-27 Thread Mark Kettenis
 Date: Fri, 26 Sep 2014 21:01:38 +0200 (CEST)
 From: Mark Kettenis mark.kette...@xs4all.nl
 
 The diff below adds a kernel thread that makes memory pages filled
 with zeroes without olding the kernel lock.  The idea is that this
 should speed up MP systems because the kernel can do some useful work
 in parallel with other things, and could lower the latency on all
 systems because (userland) memory page allocation will be faster.  The
 thread runs at the absolutely lowest priority such that we only run it
 if we don't have anything else to do.
 
 But this could also slow down some systems though, because zeroing
 pages can thrash the caches in your system.
 
 So I'd like to see this diff tested on a wide variety of systems, and
 hear back from people how this diff affects their OpenBSD systems.
 Subjective reports are ok; hard benchmarks are better.
 
 One thing to look at when you're running this diff is the output of
 vmstat -s.  It will report something like:
 
 2146380 zeroed page hits
   34258 zeroed page misses
 
 You want the number of hits to be significantly larger than the number
 of misses.  And you want the number of hits to keep growing.  Another
 thing to look at is systat vm.  If you mke your screen a bit bigger
 than 25 lines, you'll see a pzidle counter which indicates how much
 zeroed pages have been consumed (negative) or created (positive).
 
 Thanks,
 
 Mark

Forgot to include a file in the diff.  Here is a complete one:

Index: kern/init_main.c
===
RCS file: /home/cvs/src/sys/kern/init_main.c,v
retrieving revision 1.217
diff -u -p -r1.217 init_main.c
--- kern/init_main.c14 Aug 2014 09:01:47 -  1.217
+++ kern/init_main.c2 Sep 2014 19:02:37 -
@@ -526,6 +526,10 @@ main(void *framep)
if (kthread_create(uvm_aiodone_daemon, NULL, NULL, aiodoned))
panic(fork aiodoned);
 
+   /* Create the page zeroing kernel thread. */
+   if (kthread_create(uvm_pagezero_thread, NULL, NULL, zerothread))
+   panic(fork zerothread);
+
 #if defined(MULTIPROCESSOR)
/* Boot the secondary processors. */
cpu_boot_secondary_processors();
Index: uvm/uvm_extern.h
===
RCS file: /home/cvs/src/sys/uvm/uvm_extern.h,v
retrieving revision 1.119
diff -u -p -r1.119 uvm_extern.h
--- uvm/uvm_extern.h11 Jul 2014 16:35:40 -  1.119
+++ uvm/uvm_extern.h12 Jul 2014 19:02:23 -
@@ -519,6 +519,7 @@ voiduvm_vnp_sync(struct mount *);
 void   uvm_vnp_terminate(struct vnode *);
 boolean_t  uvm_vnp_uncache(struct vnode *);
 struct uvm_object  *uvn_attach(struct vnode *, vm_prot_t);
+void   uvm_pagezero_thread(void *);
 void   kmeminit_nkmempages(void);
 void   kmeminit(void);
 extern u_int   nkmempages;
Index: uvm/uvm_page.h
===
RCS file: /home/cvs/src/sys/uvm/uvm_page.h,v
retrieving revision 1.54
diff -u -p -r1.54 uvm_page.h
--- uvm/uvm_page.h  11 Jul 2014 16:35:40 -  1.54
+++ uvm/uvm_page.h  12 Jul 2014 19:02:23 -
@@ -296,7 +296,7 @@ int vm_physseg_find(paddr_t, int *);
 #define uvm_lock_fpageq()  mtx_enter(uvm.fpageqlock);
 #define uvm_unlock_fpageq()mtx_leave(uvm.fpageqlock);
 
-#defineUVM_PAGEZERO_TARGET (uvmexp.free)
+#defineUVM_PAGEZERO_TARGET (uvmexp.free / 8)
 
 #define VM_PAGE_TO_PHYS(entry) ((entry)-phys_addr)
 
Index: uvm/uvm_pmemrange.c
===
RCS file: /home/cvs/src/sys/uvm/uvm_pmemrange.c,v
retrieving revision 1.41
diff -u -p -r1.41 uvm_pmemrange.c
--- uvm/uvm_pmemrange.c 14 Sep 2014 14:17:27 -  1.41
+++ uvm/uvm_pmemrange.c 24 Sep 2014 15:29:00 -
@@ -21,6 +21,7 @@
 #include uvm/uvm.h
 #include sys/malloc.h
 #include sys/kernel.h
+#include sys/kthread.h
 #include sys/mount.h
 
 /*
@@ -107,7 +108,7 @@ voiduvm_pmr_assertvalid(struct uvm_pmem
 #endif
 
 int uvm_pmr_get1page(psize_t, int, struct pglist *,
-   paddr_t, paddr_t);
+   paddr_t, paddr_t, int);
 
 struct uvm_pmemrange   *uvm_pmr_allocpmr(void);
 struct vm_page *uvm_pmr_nfindsz(struct uvm_pmemrange *, psize_t, int);
@@ -824,7 +825,7 @@ retry_desperate:
if (count = maxseg  align == 1  boundary == 0 
(flags  UVM_PLA_TRYCONTIG) == 0) {
fcount += uvm_pmr_get1page(count - fcount, memtype_init,
-   result, start, end);
+   result, start, end, 0);
 
/*
 * If we found sufficient pages, go to the succes exit code.
@@ -1036,6 +1037,8 @@ out:
 
if (found-pg_flags  PG_ZERO) {
uvmexp.zeropages

nofault mappings

2014-09-30 Thread Mark Kettenis
The diff below intorduces a new flag for mmap(2) that creates mappings
that cannot fault.  Normally, if you mmap a file, and your mapping is
larger than the mapped file, memory access to full pages beyond the
end of the file will fault.  Depending on the OS you will get a
SIGSEGV or SIGBUS and if you don't catch those, you die.  This is
especially nasty if you use file descriptor passing to share the file
descriptor with some other process and this other proces ftruncates
the file without telling you.

The new xserver that matthieu@ just imported has the new xshm
extension which mmaps file descriptors passed by clients through file
descriptor passing.  To protect itself from being trivially DOSed by a
malicious (or careless) client, it keeps a list of mappings and
installs a SIGBUS signal handler that checks whether the fault address
matches any of these mappings.  In that case it mmaps a private
anonymous page on top of the faulting address and returns.  Since
OpenBSD generates SIGSEGV instead of SIGBUS in this case, this doesn't
work for us, so I made sure matthieu@ disabled this functionality for
now.  But the new xshm extension would actually be a nice thing to
have as it circumvents certain problems with the traditional xshm
extension that we have because of privsep.  And file descriptor
passing is also being used for DRI3 which we may want to support one
day.  Oh, and Wayland, which some people claim will replace X any day
now, heavily uses mapping file descriptors passed over sockets as
well.

We could of course change the xserver code to also trap SIGSEGV.  But
this workaround is rather ugly.  So my idea is to make X use this new
flag and disable the stupid busfault code.

The diff is remarkably simple.  We already have the infrastructure in
place to replace mapped pages with anons to support MAP_PRIVATE and
copy-on-write.  This diff simply leverages that infrastructure to
replace a page that can't be read from the underlying object by an
anonymous pages.  Some open issues:

 * I need to check whether all combinations of flag actually make
   sense.  Should we only support __MAP_NOFAULT with non-anonymous
   mappings?

 * Should we only fixup the fault for access beyond the end of the
   mapped object (VM_PAGER_BAD) and still fault for actual IO erors
   (VM_PAGER_ERROR)?

 * Should the flag be exported without the leading underscores since
   we actually want to encourage its use?

Thoughts?


Index: sys/mman.h
===
RCS file: /cvs/src/sys/sys/mman.h,v
retrieving revision 1.26
diff -u -p -r1.26 mman.h
--- sys/mman.h  10 Jul 2014 19:00:23 -  1.26
+++ sys/mman.h  30 Sep 2014 20:34:42 -
@@ -58,8 +58,9 @@
 #define__MAP_NOREPLACE 0x0800  /* fail if address not available */
 #defineMAP_ANON0x1000  /* allocated from memory, swap space */
 #defineMAP_ANONYMOUS   MAP_ANON/* alternate POSIX spelling */
+#define__MAP_NOFAULT   0x2000
 
-#defineMAP_FLAGMASK0x1ff7
+#defineMAP_FLAGMASK0x3ff7
 
 #ifdef _KERNEL
 /*
Index: uvm/uvm.h
===
RCS file: /cvs/src/sys/uvm/uvm.h,v
retrieving revision 1.56
diff -u -p -r1.56 uvm.h
--- uvm/uvm.h   11 Jul 2014 16:35:40 -  1.56
+++ uvm/uvm.h   30 Sep 2014 21:03:43 -
@@ -90,7 +90,8 @@ struct uvm {
 #define UVM_ET_SUBMAP  0x02/* it is a vm_map submap */
 #define UVM_ET_COPYONWRITE 0x04/* copy_on_write */
 #define UVM_ET_NEEDSCOPY   0x08/* needs_copy */
-#defineUVM_ET_HOLE 0x10/* no backend */
+#define UVM_ET_HOLE0x10/* no backend */
+#define UVM_ET_NOFAULT 0x20/* don't fault */
 #define UVM_ET_FREEMAPPED  0x80/* map entry is on free list (DEBUG) */
 
 #define UVM_ET_ISOBJ(E)(((E)-etype  UVM_ET_OBJ) != 0)
@@ -98,6 +99,7 @@ struct uvm {
 #define UVM_ET_ISCOPYONWRITE(E)(((E)-etype  UVM_ET_COPYONWRITE) != 0)
 #define UVM_ET_ISNEEDSCOPY(E)  (((E)-etype  UVM_ET_NEEDSCOPY) != 0)
 #define UVM_ET_ISHOLE(E)   (((E)-etype  UVM_ET_HOLE) != 0)
+#define UVM_ET_ISNOFAULT(E)(((E)-etype  UVM_ET_NOFAULT) != 0)
 
 #ifdef _KERNEL
 
Index: uvm/uvm_extern.h
===
RCS file: /cvs/src/sys/uvm/uvm_extern.h,v
retrieving revision 1.119
diff -u -p -r1.119 uvm_extern.h
--- uvm/uvm_extern.h11 Jul 2014 16:35:40 -  1.119
+++ uvm/uvm_extern.h30 Sep 2014 20:08:36 -
@@ -148,14 +148,15 @@ typedef int   vm_prot_t;
 #define UVM_ADV_MASK   0x7 /* mask */
 
 /* mapping flags */
-#define UVM_FLAG_FIXED   0x01 /* find space */
-#define UVM_FLAG_OVERLAY 0x02 /* establish overlay */
-#define UVM_FLAG_NOMERGE 0x04 /* don't merge map entries */
-#define UVM_FLAG_COPYONW 0x08 /* set copy_on_write flag */
-#define UVM_FLAG_AMAPPAD 0x10 /* for bss: pad amap to reduce malloc() 

Re: USB stack change needed for xhci(4)

2014-10-03 Thread Mark Kettenis
 Date: Thu, 2 Oct 2014 12:20:14 +0200
 From: Martin Pieuchot mpieuc...@nolizard.org
 
 Our USB stack contains a hack needed for ehci(4) and ohci(4) that
 breaks xhci(4).   The diff below moves this hack in these drivers,
 and makes it possible to have a working xhci(4) in GENERIC.
 
 I'd like this diff to be tested on as much machines as possible, because
 the code path it touches is very sensible.  This also matters if you are
 using uhci(4)!
 
 Please test and report back.

Works fine for me on my old i386 Mac mini with usb keyboard and mouse
and two usb disks.  Diff makes sense to me as well.

ok kettenis@

 Index: ehci.c
 ===
 RCS file: /cvs/src/sys/dev/usb/ehci.c,v
 retrieving revision 1.168
 diff -u -p -r1.168 ehci.c
 --- ehci.c1 Sep 2014 08:13:02 -   1.168
 +++ ehci.c2 Oct 2014 09:30:28 -
 @@ -99,6 +99,7 @@ struct ehci_pipe {
  u_int8_t ehci_reverse_bits(u_int8_t, int);
  
  usbd_status  ehci_open(struct usbd_pipe *);
 +int  ehci_setaddr(struct usbd_device *, int);
  void ehci_poll(struct usbd_bus *);
  void ehci_softintr(void *);
  int  ehci_intr1(struct ehci_softc *);
 @@ -215,7 +216,7 @@ void  ehci_dump_exfer(struct ehci_xfer *
  
  struct usbd_bus_methods ehci_bus_methods = {
   .open_pipe = ehci_open,
 - .dev_setaddr = usbd_set_address,
 + .dev_setaddr = ehci_setaddr,
   .soft_intr = ehci_softintr,
   .do_poll = ehci_poll,
   .allocx = ehci_allocx,
 @@ -603,6 +604,40 @@ ehci_pcd(struct ehci_softc *sc, struct u
   xfer-status = USBD_NORMAL_COMPLETION;
  
   usb_transfer_complete(xfer);
 +}
 +
 +/*
 + * Work around the half configured control (default) pipe when setting
 + * the address of a device.
 + *
 + * Because a single QH is setup per endpoint in ehci_open(), and the
 + * control pipe is configured before we could have set the address
 + * of the device or read the wMaxPacketSize of the endpoint, we have
 + * to re-open the pipe twice here.
 + */
 +int
 +ehci_setaddr(struct usbd_device *dev, int addr)
 +{
 + /* Root Hub */
 + if (dev-depth == 0)
 + return (0);
 +
 + /* Re-establish the default pipe with the new max packet size. */
 + ehci_close_pipe(dev-default_pipe);
 + if (ehci_open(dev-default_pipe))
 + return (EINVAL);
 +
 + if (usbd_set_address(dev, addr))
 + return (1);
 +
 + dev-address = addr;
 +
 + /* Re-establish the default pipe with the new address. */
 + ehci_close_pipe(dev-default_pipe);
 + if (ehci_open(dev-default_pipe))
 + return (EINVAL);
 +
 + return (0);
  }
  
  void
 Index: ohci.c
 ===
 RCS file: /cvs/src/sys/dev/usb/ohci.c,v
 retrieving revision 1.139
 diff -u -p -r1.139 ohci.c
 --- ohci.c10 Aug 2014 11:18:57 -  1.139
 +++ ohci.c2 Oct 2014 09:33:03 -
 @@ -88,6 +88,7 @@ usbd_status ohci_alloc_std_chain(struct 
   struct ohci_soft_td **);
  
  usbd_status  ohci_open(struct usbd_pipe *);
 +int  ohci_setaddr(struct usbd_device *, int);
  void ohci_poll(struct usbd_bus *);
  void ohci_softintr(void *);
  void ohci_waitintr(struct ohci_softc *, struct usbd_xfer *);
 @@ -232,7 +233,7 @@ struct ohci_pipe {
  
  struct usbd_bus_methods ohci_bus_methods = {
   .open_pipe = ohci_open,
 - .dev_setaddr = usbd_set_address,
 + .dev_setaddr = ohci_setaddr,
   .soft_intr = ohci_softintr,
   .do_poll = ohci_poll,
   .allocx = ohci_allocx,
 @@ -2003,6 +2004,40 @@ ohci_open(struct usbd_pipe *pipe)
   bad0:
   return (USBD_NOMEM);
  
 +}
 +
 +/*
 + * Work around the half configured control (default) pipe when setting
 + * the address of a device.
 + *
 + * Because a single ED is setup per endpoint in ohci_open(), and the
 + * control pipe is configured before we could have set the address
 + * of the device or read the wMaxPacketSize of the endpoint, we have
 + * to re-open the pipe twice here.
 + */
 +int
 +ohci_setaddr(struct usbd_device *dev, int addr)
 +{
 + /* Root Hub */
 + if (dev-depth == 0)
 + return (0);
 +
 + /* Re-establish the default pipe with the new max packet size. */
 + ohci_device_ctrl_close(dev-default_pipe);
 + if (ohci_open(dev-default_pipe))
 + return (EINVAL);
 +
 + if (usbd_set_address(dev, addr))
 + return (1);
 +
 + dev-address = addr;
 +
 + /* Re-establish the default pipe with the new address. */
 + ohci_device_ctrl_close(dev-default_pipe);
 + if (ohci_open(dev-default_pipe))
 + return (EINVAL);
 +
 + return (0);
  }
  
  /*
 Index: usb_subr.c
 ===
 RCS file: /cvs/src/sys/dev/usb/usb_subr.c,v
 retrieving revision 1.109
 diff -u -p -r1.109 usb_subr.c
 --- usb_subr.c1 Oct 2014 

Re: nofault mappings

2014-10-03 Thread Mark Kettenis
 Date: Tue, 30 Sep 2014 23:12:10 +0200 (CEST)
 From: Mark Kettenis mark.kette...@xs4all.nl
 
 The diff below intorduces a new flag for mmap(2) that creates mappings
 that cannot fault.  Normally, if you mmap a file, and your mapping is
 larger than the mapped file, memory access to full pages beyond the
 end of the file will fault.  Depending on the OS you will get a
 SIGSEGV or SIGBUS and if you don't catch those, you die.  This is
 especially nasty if you use file descriptor passing to share the file
 descriptor with some other process and this other proces ftruncates
 the file without telling you.
 
 The new xserver that matthieu@ just imported has the new xshm
 extension which mmaps file descriptors passed by clients through file
 descriptor passing.  To protect itself from being trivially DOSed by a
 malicious (or careless) client, it keeps a list of mappings and
 installs a SIGBUS signal handler that checks whether the fault address
 matches any of these mappings.  In that case it mmaps a private
 anonymous page on top of the faulting address and returns.  Since
 OpenBSD generates SIGSEGV instead of SIGBUS in this case, this doesn't
 work for us, so I made sure matthieu@ disabled this functionality for
 now.  But the new xshm extension would actually be a nice thing to
 have as it circumvents certain problems with the traditional xshm
 extension that we have because of privsep.  And file descriptor
 passing is also being used for DRI3 which we may want to support one
 day.  Oh, and Wayland, which some people claim will replace X any day
 now, heavily uses mapping file descriptors passed over sockets as
 well.
 
 We could of course change the xserver code to also trap SIGSEGV.  But
 this workaround is rather ugly.  So my idea is to make X use this new
 flag and disable the stupid busfault code.
 
 The diff is remarkably simple.  We already have the infrastructure in
 place to replace mapped pages with anons to support MAP_PRIVATE and
 copy-on-write.  This diff simply leverages that infrastructure to
 replace a page that can't be read from the underlying object by an
 anonymous pages.  Some open issues:
 
  * I need to check whether all combinations of flag actually make
sense.  Should we only support __MAP_NOFAULT with non-anonymous
mappings?
 
  * Should we only fixup the fault for access beyond the end of the
mapped object (VM_PAGER_BAD) and still fault for actual IO erors
(VM_PAGER_ERROR)?
 
  * Should the flag be exported without the leading underscores since
we actually want to encourage its use?
 
 Thoughts?

Even though this diff has been committed, I'm still interested in what
people think about the issues above.

 Index: sys/mman.h
 ===
 RCS file: /cvs/src/sys/sys/mman.h,v
 retrieving revision 1.26
 diff -u -p -r1.26 mman.h
 --- sys/mman.h10 Jul 2014 19:00:23 -  1.26
 +++ sys/mman.h30 Sep 2014 20:34:42 -
 @@ -58,8 +58,9 @@
  #define  __MAP_NOREPLACE 0x0800  /* fail if address not available */
  #define  MAP_ANON0x1000  /* allocated from memory, swap space */
  #define  MAP_ANONYMOUS   MAP_ANON/* alternate POSIX spelling */
 +#define  __MAP_NOFAULT   0x2000
  
 -#define  MAP_FLAGMASK0x1ff7
 +#define  MAP_FLAGMASK0x3ff7
  
  #ifdef _KERNEL
  /*
 Index: uvm/uvm.h
 ===
 RCS file: /cvs/src/sys/uvm/uvm.h,v
 retrieving revision 1.56
 diff -u -p -r1.56 uvm.h
 --- uvm/uvm.h 11 Jul 2014 16:35:40 -  1.56
 +++ uvm/uvm.h 30 Sep 2014 21:03:43 -
 @@ -90,7 +90,8 @@ struct uvm {
  #define UVM_ET_SUBMAP0x02/* it is a vm_map submap */
  #define UVM_ET_COPYONWRITE   0x04/* copy_on_write */
  #define UVM_ET_NEEDSCOPY 0x08/* needs_copy */
 -#define  UVM_ET_HOLE 0x10/* no backend */
 +#define UVM_ET_HOLE  0x10/* no backend */
 +#define UVM_ET_NOFAULT   0x20/* don't fault */
  #define UVM_ET_FREEMAPPED0x80/* map entry is on free list (DEBUG) */
  
  #define UVM_ET_ISOBJ(E)  (((E)-etype  UVM_ET_OBJ) != 0)
 @@ -98,6 +99,7 @@ struct uvm {
  #define UVM_ET_ISCOPYONWRITE(E)  (((E)-etype  UVM_ET_COPYONWRITE) != 0)
  #define UVM_ET_ISNEEDSCOPY(E)(((E)-etype  UVM_ET_NEEDSCOPY) != 0)
  #define UVM_ET_ISHOLE(E) (((E)-etype  UVM_ET_HOLE) != 0)
 +#define UVM_ET_ISNOFAULT(E)  (((E)-etype  UVM_ET_NOFAULT) != 0)
  
  #ifdef _KERNEL
  
 Index: uvm/uvm_extern.h
 ===
 RCS file: /cvs/src/sys/uvm/uvm_extern.h,v
 retrieving revision 1.119
 diff -u -p -r1.119 uvm_extern.h
 --- uvm/uvm_extern.h  11 Jul 2014 16:35:40 -  1.119
 +++ uvm/uvm_extern.h  30 Sep 2014 20:08:36 -
 @@ -148,14 +148,15 @@ typedef int vm_prot_t;
  #define UVM_ADV_MASK 0x7 /* mask */
  
  /* mapping flags */
 -#define UVM_FLAG_FIXED

Re: implement CLOCK_VIRTUAL and CLOCK_PROF

2014-10-15 Thread Mark Kettenis
 From: Todd C. Miller todd.mil...@courtesan.com
 Date: Wed, 15 Oct 2014 11:37:26 -0600
 
 Since this came up in another thread.  Trivial implementations of
 CLOCK_VIRTUAL and CLOCK_PROF, modeled after what FreeBSD does.

Shouldn't this do a tuagg() on all the threads of the process like we
do for getrusage?  Otherwise the CLOCK_VIRTUAL and CLOCK_PROF clocks
will only be updated upon a context switch.

Also, you should add support for these to clock_getres(2).

Oh, and documenting CLOCK_PROF in the man page would be good.

 Index: sys/sys/_time.h
 ===
 RCS file: /home/cvs/openbsd/src/sys/sys/_time.h,v
 retrieving revision 1.6
 diff -u -r1.6 _time.h
 --- sys/sys/_time.h   6 Oct 2013 01:27:49 -   1.6
 +++ sys/sys/_time.h   15 Oct 2014 13:40:28 -
 @@ -38,6 +38,7 @@
  #define CLOCK_MONOTONIC  3
  #define CLOCK_THREAD_CPUTIME_ID  4
  #define CLOCK_UPTIME 5
 +#define CLOCK_PROF   6
  
  #if __BSD_VISIBLE
  #define  __CLOCK_USE_TICKET_LOCKS8   /* flag for 
 __thrsleep() */
 Index: sys/kern/kern_time.c
 ===
 RCS file: /home/cvs/openbsd/src/sys/kern/kern_time.c,v
 retrieving revision 1.88
 diff -u -r1.88 kern_time.c
 --- sys/kern/kern_time.c  15 May 2014 04:36:33 -  1.88
 +++ sys/kern/kern_time.c  15 Oct 2014 14:31:06 -
 @@ -108,12 +108,20 @@
  int
  clock_gettime(struct proc *p, clockid_t clock_id, struct timespec *tp)
  {
 + struct timespec sys;
   struct bintime bt;
   struct proc *q;
  
   switch (clock_id) {
   case CLOCK_REALTIME:
   nanotime(tp);
 + break;
 + case CLOCK_VIRTUAL:
 + calctsru(p-p_p-ps_tu, tp, sys, NULL);
 + break;
 + case CLOCK_PROF:
 + calctsru(p-p_p-ps_tu, tp, sys, NULL);
 + timespecadd(tp, sys, tp);
   break;
   case CLOCK_UPTIME:
   binuptime(bt);
 
 



Re: implement CLOCK_VIRTUAL and CLOCK_PROF

2014-10-15 Thread Mark Kettenis
 Date: Wed, 15 Oct 2014 20:18:10 +0200 (CEST)
 From: Mark Kettenis mark.kette...@xs4all.nl
 
  From: Todd C. Miller todd.mil...@courtesan.com
  Date: Wed, 15 Oct 2014 11:37:26 -0600
  
  Since this came up in another thread.  Trivial implementations of
  CLOCK_VIRTUAL and CLOCK_PROF, modeled after what FreeBSD does.
 
 Shouldn't this do a tuagg() on all the threads of the process like we
 do for getrusage?  Otherwise the CLOCK_VIRTUAL and CLOCK_PROF clocks
 will only be updated upon a context switch.
 
 Also, you should add support for these to clock_getres(2).
 
 Oh, and documenting CLOCK_PROF in the man page would be good.

Hmm, looking at the FreeBSD man page... isn't CLOCK_PROF the same
thing as CLOCK_PROCESS_CPUTIME_ID?



Re: implement CLOCK_VIRTUAL and CLOCK_PROF

2014-10-15 Thread Mark Kettenis
 Date: Wed, 15 Oct 2014 20:36:33 +0200 (CEST)
 From: Mark Kettenis mark.kette...@xs4all.nl
 
  Date: Wed, 15 Oct 2014 20:18:10 +0200 (CEST)
  From: Mark Kettenis mark.kette...@xs4all.nl
  
   From: Todd C. Miller todd.mil...@courtesan.com
   Date: Wed, 15 Oct 2014 11:37:26 -0600
   
   Since this came up in another thread.  Trivial implementations of
   CLOCK_VIRTUAL and CLOCK_PROF, modeled after what FreeBSD does.
  
  Shouldn't this do a tuagg() on all the threads of the process like we
  do for getrusage?  Otherwise the CLOCK_VIRTUAL and CLOCK_PROF clocks
  will only be updated upon a context switch.
  
  Also, you should add support for these to clock_getres(2).
  
  Oh, and documenting CLOCK_PROF in the man page would be good.
 
 Hmm, looking at the FreeBSD man page... isn't CLOCK_PROF the same
 thing as CLOCK_PROCESS_CPUTIME_ID?

Oh, and while FreeBSD seems to implement CLOCK_VIRTUAL and CLOCK_PROF
as per-process, Solaris implements them as per-thread (but doesn't
document them).  And on Solaris CLOCK_PROF is just an alias for
CLOCK_THREAD_CPUTIME_ID.

Starting to doubt if we really want to implement these...



Re: implement CLOCK_VIRTUAL and CLOCK_PROF

2014-10-16 Thread Mark Kettenis
 From: Todd C. Miller todd.mil...@courtesan.com
 Date: Thu, 16 Oct 2014 07:33:23 -0600
 
 On Wed, 15 Oct 2014 21:50:44 -0700, Philip Guenther wrote:
 
  IMO we should just delete CLOCK_VIRTUAL from sys/_time.h and 
  clock_gettime(2)
 
 Easy enough.

ok kettenis@

 Index: sys/sys/_time.h
 ===
 RCS file: /home/cvs/openbsd/src/sys/sys/_time.h,v
 retrieving revision 1.6
 diff -u -r1.6 _time.h
 --- sys/sys/_time.h   6 Oct 2013 01:27:49 -   1.6
 +++ sys/sys/_time.h   16 Oct 2014 12:47:02 -
 @@ -33,7 +33,6 @@
  #define _SYS__TIME_H_
  
  #define CLOCK_REALTIME   0
 -#define CLOCK_VIRTUAL1
  #define CLOCK_PROCESS_CPUTIME_ID 2
  #define CLOCK_MONOTONIC  3
  #define CLOCK_THREAD_CPUTIME_ID  4
 Index: lib/libc/sys/clock_gettime.2
 ===
 RCS file: /home/cvs/openbsd/src/lib/libc/sys/clock_gettime.2,v
 retrieving revision 1.24
 diff -u -r1.24 clock_gettime.2
 --- lib/libc/sys/clock_gettime.2  21 Jan 2014 03:15:45 -  1.24
 +++ lib/libc/sys/clock_gettime.2  16 Oct 2014 12:43:33 -
 @@ -62,9 +62,6 @@
  .Bl -tag -width CLOCK_MONOTONIC
  .It Dv CLOCK_REALTIME
  time that increments as a wall clock should
 -.It Dv CLOCK_VIRTUAL
 -time that increments only when
 -the CPU is running in user mode on behalf of the calling process
  .It Dv CLOCK_PROCESS_CPUTIME_ID
  time that increments when the CPU is running in user or kernel mode
  on behalf of the calling process
 
 



Conditional include in make(1)

2014-10-16 Thread Mark Kettenis
Hi Marc,

Is there a reason why conditional includes (sinclude/-include) aren't
enabled in OpenBSD?

I'm asking because the Xorg people now use it in one of the xserver
Makefile.  We could of course try to convince them to revert the
change they made.  But it is a somewhat useful feature.

Thanks,

Mark



Re: SPARC64: suggested fixes for OF interface

2014-10-18 Thread Mark Kettenis
 Date: Thu, 02 Oct 2014 14:33:22 +0100
 From: Mark Cave-Ayland mark.cave-ayl...@ilande.co.uk
 
 Hi all,
 
  From my work on running OpenBSD under OpenBIOS/QEMU, I found a couple 
 of bugs in the NetBSD OF bindings for SPARC64 which also seem to be 
 relevant to OpenBSD. I've applied patches to OpenBIOS to compensate for 
 these bugs which allows OpenBSD to boot under QEMU, but thought that as 
 there is interest here it would be worth documenting them for the sake 
 of correctness.

Thanks!  Both issues have been addressed now.



Re: Conditional include in make(1)

2014-10-19 Thread Mark Kettenis
 Date: Sun, 19 Oct 2014 09:25:51 +0200
 From: Matthieu Herrb matth...@herrb.eu
 
 On Thu, Oct 16, 2014 at 08:14:16PM +0200, Mark Kettenis wrote:
  Hi Marc,
  
  Is there a reason why conditional includes (sinclude/-include) aren't
  enabled in OpenBSD?
  
  I'm asking because the Xorg people now use it in one of the xserver
  Makefile.  We could of course try to convince them to revert the
  change they made.  But it is a somewhat useful feature.
 
 So, now that espie@ has committed the bit, ok to remove the local
 change that made the inclusion unconditionnal (and slightly broke make
 clean in xserver) ?

That's exactly why I wanted it.

ok kettenis@

 Index: Makefile.am
 ===
 RCS file: /cvs/OpenBSD/xenocara/xserver/hw/xfree86/Makefile.am,v
 retrieving revision 1.9
 diff -u -r1.9 Makefile.am
 --- Makefile.am   27 Sep 2014 17:53:01 -  1.9
 +++ Makefile.am   19 Oct 2014 07:23:22 -
 @@ -138,7 +138,7 @@
   $(AM_V_GEN)CPP='$(CPP)' AWK='$(AWK)' $(SHELL) $(srcdir)/sdksyms.sh 
 $(top_srcdir) $(CFLAGS) $(AM_CFLAGS) $(AM_CPPFLAGS)
  
  SDKSYMS_DEP = sdksyms.dep
 -include $(SDKSYMS_DEP)
 +-include $(SDKSYMS_DEP)
  
  i2c/libi2c.la:
   $(AM_V_at)cd i2c  $(MAKE) libi2c.la
 Index: Makefile.in
 ===
 RCS file: /cvs/OpenBSD/xenocara/xserver/hw/xfree86/Makefile.in,v
 retrieving revision 1.29
 diff -u -r1.29 Makefile.in
 --- Makefile.in   27 Sep 2014 17:53:01 -  1.29
 +++ Makefile.in   19 Oct 2014 07:23:22 -
 @@ -1116,7 +1116,7 @@
  
  sdksyms.dep sdksyms.c: sdksyms.sh
   $(AM_V_GEN)CPP='$(CPP)' AWK='$(AWK)' $(SHELL) $(srcdir)/sdksyms.sh 
 $(top_srcdir) $(CFLAGS) $(AM_CFLAGS) $(AM_CPPFLAGS)
 -include $(SDKSYMS_DEP)
 +-include $(SDKSYMS_DEP)
  
  i2c/libi2c.la:
   $(AM_V_at)cd i2c  $(MAKE) libi2c.la
 
 -- 
 Matthieu Herrb
 



Re: make release fails if SUDO is set in mk.conf

2014-10-24 Thread Mark Kettenis
 Date: Fri, 24 Oct 2014 08:35:40 +0200
 From: Landry Breuil lan...@rhaalovely.net
 
 On Fri, Oct 24, 2014 at 02:34:54AM -0400, thev...@openmailbox.org wrote:
  with SUDO set in /etc/mk.conf:
if make release is run as root it will not proceed.
if run as a regular user it gets further, but fails on permissions.
  
  without SUDO in /etc/mk.conf (and i presume the environment) it works fine.
  
  is there any way around this allowing /etc/mk.conf (which is useful for 
  ports)?
  i can always move it temporarily, add it to my automated scripts, but is 
  there
  a better way?
  
  
  $ cat /etc/mk.conf
  SUDO=/usr/bin/sudo
 
 I think (and this is probably somewhere in the docs) you should use sudo -E.

-E is a rather dangerous sudo option.



Re: strings, change default to -a?

2014-10-25 Thread Mark Kettenis
 Date: Sat, 25 Oct 2014 13:18:04 +0100
 From: Stuart Henderson st...@openbsd.org
 
 http://seclists.org/oss-sec/2014/q4/445
 
 Any thoughts on changing strings(1) to use -a by default, to avoid
 libbfd parsing, and add a new option to allow previous behaviour for
 people who want it?
 
 About -a, posix says Scan files in their entirety. If -a is not
 specified, it is implementation-defined what portion of each file is
 scanned for strings.
 
 http://pubs.opengroup.org/onlinepubs/9699919799/utilities/strings.html#tag_20_121_04

Unless upstream makes a similar change, I don't think we should do this.

 Index: binutils/strings.c
 ===
 RCS file: /cvs/src/gnu/usr.bin/binutils/binutils/strings.c,v
 retrieving revision 1.8
 diff -u -p -r1.8 strings.c
 --- binutils/strings.c31 Aug 2014 13:40:02 -  1.8
 +++ binutils/strings.c25 Oct 2014 12:01:00 -
 @@ -174,17 +174,21 @@ main (int argc, char **argv)
string_min = -1;
print_addresses = FALSE;
print_filenames = FALSE;
 -  datasection_only = TRUE;
 +  datasection_only = FALSE;
target = NULL;
encoding = 's';
  
 -  while ((optc = getopt_long (argc, argv, afhHn:ot:e:Vv0123456789,
 +  while ((optc = getopt_long (argc, argv, aAfhHn:ot:e:Vv0123456789,
 long_options, (int *) 0)) != EOF)
  {
switch (optc)
   {
   case 'a':
 datasection_only = FALSE;
 +   break;
 +
 + case 'A':
 +   datasection_only = TRUE;
 break;
  
   case 'f':
 Index: binutils/doc/strings.1
 ===
 RCS file: /cvs/src/gnu/usr.bin/binutils/binutils/doc/strings.1,v
 retrieving revision 1.1.1.2
 diff -u -p -r1.1.1.2 strings.1
 --- binutils/doc/strings.12 Nov 2004 20:22:07 -   1.1.1.2
 +++ binutils/doc/strings.125 Oct 2014 12:01:00 -
 @@ -133,7 +133,7 @@
  strings \- print the strings of printable characters in files.
  .SH SYNOPSIS
  .IX Header SYNOPSIS
 -strings [\fB\-afov\fR] [\fB\-\fR\fImin-len\fR]
 +strings [\fB\-Aafov\fR] [\fB\-\fR\fImin-len\fR]
  [\fB\-n\fR \fImin-len\fR] [\fB\-\-bytes=\fR\fImin-len\fR]
  [\fB\-t\fR \fIradix\fR] [\fB\-\-radix=\fR\fIradix\fR]
  [\fB\-e\fR \fIencoding\fR] [\fB\-\-encoding=\fR\fIencoding\fR]
 @@ -163,6 +163,10 @@ files.
  .PD
  Do not scan only the initialized and loaded sections of object files;
  scan the whole files.
 +This is the default in this version.
 +.IP \fB\-A\fR 4
 +.IX Item -A
 +Scan only the initialized and loaded sections of object files.
  .IP \fB\-f\fR 4
  .IX Item -f
  .PD 0
 
 
 



Check PCI ROM addresses for conflicts

2014-10-25 Thread Mark Kettenis
This diff checks whether the PCI ROMs have been assigned sensible
addresses.  If not it resets the address to 0 such that drivers that
want to map the ROM can assign a suitable address themselves.  This
replicates what we have been doing for PCI BARs for the last couple of
years.

This should fix issues with some ATI/AMD Radeon cards behind bridges
on machines with broken firmware.

ok?


Index: pci.c
===
RCS file: /cvs/src/sys/dev/pci/pci.c,v
retrieving revision 1.105
diff -u -p -r1.105 pci.c
--- pci.c   14 Sep 2014 14:17:25 -  1.105
+++ pci.c   25 Oct 2014 13:59:28 -
@@ -797,12 +797,14 @@ pci_reserve_resources(struct pci_attach_
pci_chipset_tag_t pc = pa-pa_pc;
pcitag_t tag = pa-pa_tag;
pcireg_t bhlc, blr, type, bir;
+   pcireg_t addr, mask;
bus_addr_t base, limit;
bus_size_t size;
-   int reg, reg_start, reg_end;
+   int reg, reg_start, reg_end, reg_rom;
int bus, dev, func;
int sec, sub;
int flags;
+   int s;
 
pci_decompose_tag(pc, tag, bus, dev, func);
 
@@ -811,14 +813,17 @@ pci_reserve_resources(struct pci_attach_
case 0:
reg_start = PCI_MAPREG_START;
reg_end = PCI_MAPREG_END;
+   reg_rom = PCI_ROM_REG;
break;
case 1: /* PCI-PCI bridge */
reg_start = PCI_MAPREG_START;
reg_end = PCI_MAPREG_PPB_END;
+   reg_rom = 0;/* 0x38 */
break;
case 2: /* PCI-CardBus bridge */
reg_start = PCI_MAPREG_START;
reg_end = PCI_MAPREG_PCB_END;
+   reg_rom = 0;
break;
default:
return (0);
@@ -863,6 +868,28 @@ pci_reserve_resources(struct pci_attach_
 
if (type  PCI_MAPREG_MEM_TYPE_64BIT)
reg += 4;
+   }
+
+   if (reg_rom != 0) {
+   s = splhigh();
+   addr = pci_conf_read(pc, tag, PCI_ROM_REG);
+   pci_conf_write(pc, tag, PCI_ROM_REG, ~PCI_ROM_ENABLE);
+   mask = pci_conf_read(pc, tag, PCI_ROM_REG);
+   pci_conf_write(pc, tag, PCI_ROM_REG, addr);
+   splx(s);
+
+   base = PCI_ROM_ADDR(addr);
+   size = PCI_ROM_SIZE(mask);
+   if (base != 0  size != 0) {
+   if (pa-pa_pmemex  extent_alloc_region(pa-pa_pmemex,
+   base, size, EX_NOWAIT) 
+   pa-pa_memex  extent_alloc_region(pa-pa_memex,
+   base, size, EX_NOWAIT)) {
+   printf(%d:%d:%d: mem address conflict 
0x%lx/0x%lx\n,
+   bus, dev, func, base, size);
+   pci_conf_write(pc, tag, PCI_ROM_REG, 0);
+   }
+   }
}
 
if (PCI_HDRTYPE_TYPE(bhlc) != 1)



Re: have indent default to stdin with no input files

2013-06-20 Thread Mark Kettenis
 Date: Thu, 20 Jun 2013 17:03:48 +1000
 From: Jonathan Gray j...@jsg.id.au
 
 Default to stdin/stdout if no input files are
 given to indent.  FreeBSD and GNU indent have had
 this behaviour for a long time now.
 
 Based on FreeBSD svn rev 40502 from back in 1998.

Makes sense to me.

ok kettenis@

 Index: indent.1
 ===
 RCS file: /cvs/src/usr.bin/indent/indent.1,v
 retrieving revision 1.21
 diff -u -p -r1.21 indent.1
 --- indent.1  20 Jun 2013 06:28:15 -  1.21
 +++ indent.1  20 Jun 2013 06:50:52 -
 @@ -41,7 +41,7 @@
  .Sh SYNOPSIS
  .Nm indent
  .Bk -words
 -.Ar input-file Op Ar output-file
 +.Op Ar input-file Op Ar output-file
  .Op Fl bad | nbad
  .Op Fl bap | nbap
  .Op Fl bbb | nbbb
 @@ -114,6 +114,11 @@ is specified,
  .Nm
  checks to make sure it is different from
  .Ar input-file .
 +.Pp
 +If no
 +.Ar input-file
 +is specified
 +input is read from stdin and the formatted file is written to stdout. 
  .Pp
  The options listed below control the formatting style imposed by
  .Nm .
 Index: indent.c
 ===
 RCS file: /cvs/src/usr.bin/indent/indent.c,v
 retrieving revision 1.21
 diff -u -p -r1.21 indent.c
 --- indent.c  20 Jun 2013 06:28:15 -  1.21
 +++ indent.c  20 Jun 2013 06:50:52 -
 @@ -198,11 +198,10 @@ main(int argc, char **argv)
   set_option(argv[i]);
  }/* end of for */
  if (input == NULL) {
 - fprintf(stderr, usage: indent input-file [output-file] [options]\n);
 - exit(1);
 + input = stdin;
  }
  if (output == NULL) {
 - if (troff)
 + if (troff || input == stdin)
   output = stdout;
   else {
   out_name = in_name;
 
 



Re: help X11 performance: make sigprocmask(2) SY_NOLOCK

2013-06-20 Thread Mark Kettenis
 Date: Thu, 20 Jun 2013 11:30:11 +0100
 From: Stuart Henderson st...@openbsd.org
 
 On 2013/06/20 09:38, Otto Moerbeek wrote:
  On Wed, Jun 19, 2013 at 07:39:15PM +, Miod Vallat wrote:
  
  -   p-p_sigmask = mask ~ sigcantmask;
  +   p-p_sigmask = mask;

On the right architecture where a word store isn't atomic enough and
with the right compiler that decides to put p_sigmask on an address
ending with 0xFFF with 4k-sized pages, we have two problems already.
   
   Holy Pumpkin forbid, struct proc layout is sane enough for fields to be
   aligned to their natural alignment, and the allocator will return
   properly aligned structs as well.
   
I'm only asking if such a situation can happen, or if there is some
ensure_this_assignment_is_always_atomic(p-p_sigmask, mask); function
that I missed.
   
   If such a function exists, it should be shot, if only for having a too
   long name.
   
   Miod
  
  But watch out, as kettenis@ already mentioned elsewhere, this can only
  be safe if *all* reads and modifications to the mask are done in an
  atomic way. 
  
  Straight read and assignment of int values is safe, afaik, but e.g.
  struct assignment and memcpy are *not* atomic. 

And p_sigmask is copied during fork, so that needs a bit of thought.
I guess it doesn't matter for the new child, as it isn't running yet
and therefore can't invoke sigprocmask(2).  And the parent should be
safe as well, as it is in fork(2) and therefore can't call
sigprocmask(2) either.

 I spotted a few places where p_sigmask is = or |='d, for example these
 from linux compat, might these be an issue?
 
 p-p_sigmask = ~bs;
 p-p_sigmask |= bs  ~sigcantmask;

There's also a couple of additional p-p_sigmask |= ... in kern_sig.c.

All of these need to become atomic_setbits_int/atomic_clearbits_int calls.



Re: help X11 performance: make sigprocmask(2) SY_NOLOCK

2013-06-20 Thread Mark Kettenis
 Date: Wed, 19 Jun 2013 13:40:19 +0200
 From: Martin Pelikan martin.peli...@gmail.com
 
  If you're right that atomic_{clear,set}bits_int is correct and
  sufficient and actually faster, then all dynamic executables would
  benefit from this speedup (sigprocmask is used in ld.so(1)).
 
 Since on i386 GENERIC these atomic_* things don't emit the LOCK prefix,
 performance shouldn't be an issue; I'm actually more worried about this bit:
 
   - p-p_sigmask = mask ~ sigcantmask;
   + p-p_sigmask = mask;
 
 On the right architecture where a word store isn't atomic enough and
 with the right compiler that decides to put p_sigmask on an address
 ending with 0xFFF with 4k-sized pages, we have two problems already.
 
 I'm only asking if such a situation can happen, or if there is some
 ensure_this_assignment_is_always_atomic(p-p_sigmask, mask); function
 that I missed.

There isn't one.  Unfortunately, we might need one to support SMP on
hppa, where we have the emulate atomic operations using a lock.  In
fact you need to worry about reading as well.  And since ptsignal()
looks at the p_sigmask of other threads in the process, it is not
immediately obvious there isn't a problem here.

I think the fact that only the thread itself can change its sigmask
means that there isn't an issue here.  But that probably means that
bothering with atomic_setbits_int/atomic_clearbits_int isn't necessary
in the first place.



Re: help X11 performance: make sigprocmask(2) SY_NOLOCK

2013-06-20 Thread Mark Kettenis
 Date: Thu, 20 Jun 2013 14:25:49 +0300
 From: Paul Irofti p...@irofti.net
 
  +   bs = ~sigcantmask;
  switch (SCARG(uap, how)) {
  case LINUX_SIG_BLOCK:
  -   p-p_sigmask |= bs  ~sigcantmask;
  +   atomic_setbits_int(p-p_sigmask, bs);
  break;
   
  case LINUX_SIG_UNBLOCK:
  -   p-p_sigmask = ~bs;
  +   atomic_clearbits_int(p-p_sigmask, bs);
 
 I'm pretty sure this is not correct with your change to bs before the
 switch statement.

No that's fine.  The bits in sigcantmask should never be set.  So

atomic_clearbits_int(p-p_sigmask, bs  ~sigcanmask);

is equivalent to

atomic_clearbits_int(p-p_sigmask, bs);



Re: binary integer constants in gcc

2013-06-21 Thread Mark Kettenis
 Date: Fri, 21 Jun 2013 10:50:42 +0200
 From: Landry Breuil lan...@rhaalovely.net
 
 On Fri, Jun 21, 2013 at 10:20:01AM +0200, Mark Kettenis wrote:
  
  Well, lots of ports stuff is compiled with newer gcc versions anyway.
 
 Actually, not so many:
 
 $echo select count(*) from modules where value='gcc4'; | 
 sqlite3/usr/local/share/sqlports 
 34
 
 And if you rip out all the subpackages, the actual list is:
 
 audio/mscore
 editors/libreoffice
 lang/classpath
 lang/luajit
 net/rtorrent
 print/cups-filters
 textproc/pdftk
 www/mozilla-firefox
 www/seamonkey
 www/squid

Don't libreoffice and mozilla-firefox account for about half the lines
of code that's in ports? ;)

Seriously though; I was under the impression that it was a lot more.
Thanks for enlightening me Landry.



Re: Removing -Wno-format from kernel makefiles, 1/16

2013-07-03 Thread Mark Kettenis
 Date: Wed, 3 Jul 2013 16:35:24 +0200 (CEST)
 From: Stefan Fritsch s...@sfritsch.de
 
 add support for %td for ptrdiff_t in kernel
 
 this also adds support in gcc 4.x kprintf

I'm on the fence about the CTASSERT here.  If we ever support a code
model that's not ILP32 or LP64, we need a major overhaul of the code
base.  So I don't think it adds real value.

so ok kettenis@ with or without that CTASSERT.

 ---
  gnu/gcc/gcc/c-format.c |7 ---
  sys/kern/subr_prf.c|6 ++
  2 files changed, 10 insertions(+), 3 deletions(-)
 
 diff --git gnu/gcc/gcc/c-format.c gnu/gcc/gcc/c-format.c
 index b9eecee..1b1734b 100644
 --- gnu/gcc/gcc/c-format.c
 +++ gnu/gcc/gcc/c-format.c
 @@ -325,6 +325,7 @@ static const format_length_info kprintf_length_specs[] =
{ l, FMT_LEN_l, STD_C89, ll, FMT_LEN_ll, STD_C9L },
{ q, FMT_LEN_ll, STD_EXT, NULL, 0, 0 },
{ z, FMT_LEN_z, STD_C99, NULL, 0, 0 },
 +  { t, FMT_LEN_t, STD_C99, NULL, 0, 0 },
{ NULL, 0, 0, NULL, 0, 0 }
  };
  
 @@ -552,9 +553,9 @@ static const format_char_info asm_fprintf_char_table[] =
  static const format_char_info kprint_char_table[] = 
  { 
/* C89 conversion specifiers.  */ 
 -  { di,  0, STD_C89, { T89_I,   BADLEN, T89_S,   T89_L,   T9L_LL,  BADLEN, 
  T99_SST, BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, -wp0 +'I, i, NULL 
 }, 
 -  { oxX, 0, STD_C89, { T89_UI,  BADLEN, T89_US,  T89_UL,  T9L_ULL, BADLEN, 
  T99_ST,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, -wp0#,i, NULL 
 }, 
 -  { u,   0, STD_C89, { T89_UI,  BADLEN, T89_US,  T89_UL,  T9L_ULL, BADLEN, 
  T99_ST,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, -wp0'I,   i, NULL 
 }, 
 +  { di,  0, STD_C89, { T89_I,   BADLEN, T89_S,   T89_L,   T9L_LL,  BADLEN, 
  T99_SST, T99_PD,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, -wp0 +'I, i, NULL 
 }, 
 +  { oxX, 0, STD_C89, { T89_UI,  BADLEN, T89_US,  T89_UL,  T9L_ULL, BADLEN, 
  T99_ST,  T99_UPD,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, -wp0#,i, 
 NULL }, 
 +  { u,   0, STD_C89, { T89_UI,  BADLEN, T89_US,  T89_UL,  T9L_ULL, BADLEN, 
  T99_ST,  T99_UPD,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, -wp0'I,   i, 
 NULL }, 
{ c,   0, STD_C89, { T89_I,   BADLEN, BADLEN,  BADLEN,  BADLEN,  BADLEN, 
  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, -w,   , NULL 
 }, 
{ s,   1, STD_C89, { T89_C,   BADLEN, BADLEN,  BADLEN,  BADLEN,  BADLEN, 
  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, -wp,  cR, 
 NULL }, 
{ p,   1, STD_C89, { T89_V,   BADLEN, BADLEN,  T89_UL,  T9L_LL,  BADLEN, 
  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, -wp0, c, NULL 
 }, 
 diff --git sys/kern/subr_prf.c sys/kern/subr_prf.c
 index 768d164..c940141 100644
 --- sys/kern/subr_prf.c
 +++ sys/kern/subr_prf.c
 @@ -842,6 +842,12 @@ reswitch:switch (ch) {
   size = 1;
   sign = '\0';
   break;
 + case 't':
 + {
 + /* assume ptrdiff_t is long */
 + CTASSERT(sizeof(fmt - fmt0) == sizeof(long));
 + }
 + /* FALLTHROUGH */
   case 'D':
   flags |= LONGINT;
   /*FALLTHROUGH*/
 
 



Re: Removing -Wno-format from kernel makefiles, 2/16

2013-07-03 Thread Mark Kettenis
 Date: Wed, 3 Jul 2013 16:40:17 +0200 (CEST)
 From: Stefan Fritsch s...@sfritsch.de
 
 don't pass empty format string in subr_disk.c
 
 this is necessary to enable -Wformat or -Wno-error=format

Don't think this one makes much sense.  Better to just do:

  log(pri, %s, );

and keep the rest of the code as it is.  Not sure what the static
const trickery was necessary for though.

 ---
  sys/kern/subr_disk.c |   11 ++-
  1 files changed, 6 insertions(+), 5 deletions(-)
 
 diff --git sys/kern/subr_disk.c sys/kern/subr_disk.c
 index 4b500c1..2b1036a 100644
 --- sys/kern/subr_disk.c
 +++ sys/kern/subr_disk.c
 @@ -761,13 +761,14 @@ diskerr(struct buf *bp, char *dname, char *what, int 
 pri, int blkdone,
   daddr_t sn;
  
   if (pri != LOG_PRINTF) {
 - static const char fmt[] = ;
 - log(pri, fmt);
 + log(pri, %s%d%c: %s %sing fsbn , dname, unit, partname, what,
 + bp-b_flags  B_READ ? read : writ);
   pr = addlog;
 - } else
 + } else {
 + printf(%s%d%c: %s %sing fsbn , dname, unit, partname, what,
 + bp-b_flags  B_READ ? read : writ);
   pr = printf;
 - (*pr)(%s%d%c: %s %sing fsbn , dname, unit, partname, what,
 - bp-b_flags  B_READ ? read : writ);
 + }
   sn = bp-b_blkno;
   if (bp-b_bcount = DEV_BSIZE)
   (*pr)(%lld, sn);
 -- 
 1.7.6
 
 
 



Re: Removing -Wno-format from kernel makefiles, 4/16

2013-07-03 Thread Mark Kettenis
 Date: Wed, 3 Jul 2013 16:55:46 +0200 (CEST)
 From: Stefan Fritsch s...@sfritsch.de
 
 format string fixes: long
 
 ---
  sys/arch/i386/i386/esm.c |2 +-
  sys/kern/kern_descrip.c  |4 ++--
  2 files changed, 3 insertions(+), 3 deletions(-)
 
 diff --git sys/arch/i386/i386/esm.c sys/arch/i386/i386/esm.c
 index c90b2c4..3dff69e 100644
 --- sys/arch/i386/i386/esm.c
 +++ sys/arch/i386/i386/esm.c
 @@ -880,7 +880,7 @@ esm_make_sensors(struct esm_softc *sc, struct esm_devmap 
 *devmap,
   }
  
   for (j = 0; j  nsensors; j++) {
 - snprintf(s[j].desc, sizeof(s[j].desc), %s %d,
 + snprintf(s[j].desc, sizeof(s[j].desc), %s %ld,
   sensor_map[i].name, sensor_map[i].arg + j);
   }
   break;

Looking at this one, it makes more sense to make the arg member of
struct esm_sensor_map an int.  That will result in some space
savings if we'd ever bring this driver to amd64.

 diff --git sys/kern/kern_descrip.c sys/kern/kern_descrip.c
 index 50eda54..bc63a86 100644
 --- sys/kern/kern_descrip.c
 +++ sys/kern/kern_descrip.c
 @@ -1061,7 +1061,7 @@ closef(struct file *fp, struct proc *p)
  
  #ifdef DIAGNOSTIC
   if (fp-f_count  2)
 - panic(closef: count (%d)  2, fp-f_count);
 + panic(closef: count (%ld)  2, fp-f_count);
  #endif
   fp-f_count--;
  
 @@ -1097,7 +1097,7 @@ fdrop(struct file *fp, struct proc *p)
  
  #ifdef DIAGNOSTIC
   if (fp-f_count != 0)
 - panic(fdrop: count (%d) != 0, fp-f_count);
 + panic(fdrop: count (%ld) != 0, fp-f_count);
  #endif
  
   if (fp-f_ops)

ok kettenis@



Re: libc malloc poison

2013-07-04 Thread Mark Kettenis
 From: Theo de Raadt dera...@cvs.openbsd.org
 Date: Thu, 04 Jul 2013 09:04:54 -0600
 
 I suspect the best approach would be a hybrid value.  The upper half
 of the address should try to land in an unmapped zone, or into the zero
 page, or into some address space hole, ir into super high memory above
 the stack which is gauranteed unmapped.

Don't forget strict alignment architectures, where it is beneficial
to have the lowest bit set to trigger alignment traps.



Re: Removing -Wno-format from kernel makefiles, 3/16

2013-07-04 Thread Mark Kettenis
 Date: Thu, 4 Jul 2013 18:41:30 +0200 (CEST)
 From: Stefan Fritsch s...@sfritsch.de
 
 On Wed, 3 Jul 2013, Mark Kettenis wrote:
   diff --git sys/arch/i386/i386/db_interface.c 
   sys/arch/i386/i386/db_interface.c
   index 85c1ff5..c75fd89 100644
   --- sys/arch/i386/i386/db_interface.c
   +++ sys/arch/i386/i386/db_interface.c
   @@ -197,11 +197,11 @@ db_sysregs_cmd(db_expr_t addr, int have_addr, 
   db_expr_t count, char *modif)
 uint16_t ldtr, tr;

 __asm__ __volatile__(sidt %0 : =m (idtr));
   - db_printf(idtr:   0x%08x/%04x\n,
   + db_printf(idtr:   0x%08x/%04llx\n,
 (unsigned int)(idtr  16), idtr  0x);

 __asm__ __volatile__(sgdt %0 : =m (gdtr));
   - db_printf(gdtr:   0x%08x/%04x\n,
   + db_printf(gdtr:   0x%08x/%04llx\n,
 (unsigned int)(gdtr  16), gdtr  0x);
  
  This is a tad bit inconsistent.  I'd either use %llx for both values
  and get rid of the cast, or use %x and use a cast in both cases.
 
 Like this?

ok kettenis@

 --- sys/arch/i386/i386/db_interface.c
 +++ sys/arch/i386/i386/db_interface.c
 @@ -197,12 +197,10 @@ db_sysregs_cmd(db_expr_t addr, int have_addr, db_expr_t 
 count, char *modif)
   uint16_t ldtr, tr;
  
   __asm__ __volatile__(sidt %0 : =m (idtr));
 - db_printf(idtr:   0x%08x/%04x\n,
 - (unsigned int)(idtr  16), idtr  0x);
 + db_printf(idtr:   0x%08llx/%04llx\n, idtr  16, idtr  0x);
  
   __asm__ __volatile__(sgdt %0 : =m (gdtr));
 - db_printf(gdtr:   0x%08x/%04x\n,
 - (unsigned int)(gdtr  16), gdtr  0x);
 + db_printf(gdtr:   0x%08llx/%04llx\n, gdtr  16, gdtr  0x);
  
   __asm__ __volatile__(sldt %0 : =g (ldtr));
   db_printf(ldtr:   0x%04x\n, ldtr);
 



Re: Removing -Wno-format from kernel makefiles, 4/16

2013-07-04 Thread Mark Kettenis
 Date: Thu, 4 Jul 2013 18:42:50 +0200 (CEST)
 From: Stefan Fritsch s...@sfritsch.de
 
 On Wed, 3 Jul 2013, Mark Kettenis wrote:
   diff --git sys/arch/i386/i386/esm.c sys/arch/i386/i386/esm.c
   index c90b2c4..3dff69e 100644
   --- sys/arch/i386/i386/esm.c
   +++ sys/arch/i386/i386/esm.c
   @@ -880,7 +880,7 @@ esm_make_sensors(struct esm_softc *sc, struct 
   esm_devmap *devmap,
 }

 for (j = 0; j  nsensors; j++) {
   - snprintf(s[j].desc, sizeof(s[j].desc), %s %d,
   + snprintf(s[j].desc, sizeof(s[j].desc), %s %ld,
 sensor_map[i].name, sensor_map[i].arg + j);
 }
 break;
  
  Looking at this one, it makes more sense to make the arg member of
  struct esm_sensor_map an int.  That will result in some space
  savings if we'd ever bring this driver to amd64.
 

go for it

 --- sys/arch/i386/i386/esm.c
 +++ sys/arch/i386/i386/esm.c
 @@ -87,7 +87,7 @@ enum sensor_type esm_typemap[] = {
  
  struct esm_sensor_map {
   enum esm_sensor_typetype;
 - longarg;
 + int arg;
   const char  *name;
  };
  
 
 
 



i810/i815 graphics support

2013-07-05 Thread Mark Kettenis
Hi folks,

Is there anybody still using a machine with the first generation Intel
integrated graphics chipsets?

The reason I'm asking is that there is no KMS support for these
chipsets.  On top of that the xf86-vide-intel code only provides XAA
acceleration, and that's been ripped out from the Xserver.  So I'm not
convinced the driver works anymore.  And even if it does, the driver
might not have any added value.  You might actually be better off
using the xf86-video-vesa driver, as the shadowfb acceleration seems
to be somewhat broken in the xf86-video-intel driver, at least for the
newer chipsets.

The legacy part of the xf86-video-intel driver is the last user of
the AGP interfaces that the kernel provides to userland.  So dropping
support for these old chipsets completely would allow us to remove
those interfaces from the kernel, which has some (security) benefits.

Cheers,

Mark



Re: Fix to diskless(8) manpage: add amd64 and i386 to the list of clients that needs rpc.bootparamd(8)

2013-07-14 Thread Mark Kettenis
 Date: Sun, 14 Jul 2013 18:51:13 +0200
 From: Sebastian Benoit be...@openbsd.org
 
 Mark Kettenis(mark.kette...@xs4all.nl) on 2013.07.14 17:06:24 +0200:
   Date: Sun, 14 Jul 2013 14:09:26 +0200
   From: Henning Brauer lists-openbsdt...@bsws.de
   
   * Rafael Neves rafaelne...@gmail.com [2013-07-14 11:01]:
Amd64 and i386 diskless(8) setups need rpc.bootparamd(8)
   
   no, they don't.
  
  True diskless(4) operation (with root on nfs) needs rpc.bootparamd(8)
  an *all* architectures.  Merely booting a bsd.rd kernel only needs
  rpc.bootparamd(8) on the architectures mentioned under 11 in the
  EXAMPLES section of the diskless(4) page.
 
 yes, and having run though diskless 4 months ago and discovering that it was
 missing, i tought about a similar manpage diff.
 
 I mean, the page starts with
 
  When booting a system over the network, there are three phases of
  interaction between client and server:
 
  1.   The PROM (or stage-1 bootstrap) loads a boot program.
  2.   The boot program loads a kernel.
  3.   The kernel does NFS mounts for root and swap.
 
 and i would expect thats what i get when doing what it describes.
 
 Maybe just add this?

I don't think that's particularly helpful.  We should just add
rpc.bootparamd(8) to the For all clients list.

The pxebooting crowd that only wants to boot an installation kernel
should be pointed at pxeboot(8) instead of diskless(8).  Or they'll
just do a little bit of extra work ;).

Index: diskless.8
===
RCS file: /home/cvs/src/share/man/man8/diskless.8,v
retrieving revision 1.61
diff -u -p -r1.61 diskless.8
--- diskless.8  15 Apr 2013 06:29:57 -  1.61
+++ diskless.8  14 Jul 2013 17:03:47 -
@@ -364,8 +364,9 @@ For all clients:
 .Xr mountd 8 ,
 .Xr nfsd 8 ,
 .Xr portmap 8 ,
+.Xr rarpd 8 ,
 and
-.Xr rarpd 8 .
+.Xr rpc.bootparamd 8 .
 .Pp
 For alpha, amd64, hppa, hppa64, i386, sgi, mvme68k, mvme88k,
 sparc, sparc64, and vax clients:
@@ -376,9 +377,6 @@ For HP 300 and older HPPA clients:
 .Pp
 For newer alpha, amd64, hppa, hppa64, i386, and sgi clients:
 .Xr dhcpd 8
-.Pp
-For hp300, older hppa, mvme68k, mvme88k, sparc, sparc64, and vax clients:
-.Xr rpc.bootparamd 8
 .Pp
 For older alpha and vax clients:
 .Xr mopd 8



Re: awk(1) update

2013-07-14 Thread Mark Kettenis
 From: j...@wxcvbn.org (=?utf-8?Q?J=C3=A9r=C3=A9mie_Courr=C3=A8ges-Anglas?=)
 Date: Sun, 14 Jul 2013 09:41:28 +0200
 
 
 This diff updates awk to the 20121220 upstream version, with a few
 fixups.
 
 ok?
 
 Index: tran.c
 ===
 RCS file: /cvs/src/usr.bin/awk/tran.c,v
 retrieving revision 1.15
 diff -u -p -r1.15 tran.c
 --- tran.c28 Sep 2011 19:27:18 -  1.15
 +++ tran.c22 Jun 2013 21:47:01 -
 @@ -299,6 +299,8 @@ Awkfloat setfval(Cell *vp, Awkfloat f)/
   xfree(vp-sval); /* free any previous string */
   vp-tval = ~STR;   /* mark string invalid */
   vp-tval |= NUM;/* mark number ok */
 + if (f == -0)  /* who would have thought this possible? */
 + f = 0;

This is a bit silly.  Even though floating point values may be able to
repesent signed zeroes (the IEEE standard formats do) they are
guaranteed to compare equal to the zero with the opposite sign.

This diff forces all zeroes to be positive zeroes.  Not sure that is
desirable.  GNU awk seems to treat them differently.  See 

  
http://www.gnu.org/software/gawk/manual/html_node/Unexpected-Results.html#Unexpected-Results

Cheers,

Mark



Re: Consistent Kernel Panic-Hardware-Related?

2013-07-24 Thread Mark Kettenis
 Date: Mon, 8 Jul 2013 11:06:51 +0200
 From: Christian Ehrhardt ehrha...@genua.de
 
 Hi,

Hi Christian,

Taking this to tech@ in the hope some more people will look into this.

 On Thu, Jul 04, 2013 at 09:56:56AM -0700, Scott Vanderbilt wrote:
  I've been trying to build userland repeatedly over the past few days
  on a particular machine and consistently get kernel panics, though
  never at exactly the same point in the process. The latest occurred
  midway through 'make obj'. Attempts to build userland on another
  i386 machine from code pulled via cvs at more or less the same time
  works fine, so it seems the issue is isolated to this hardware.
 
  I initially suspected my SSD had gone bad, so I replaced it with a
  brand new drive. However, the issue persists, so I no longer suspect
  the drive.
 
  A ps, trace, and dmesg are provided below. This is my first
  reporting a bug of this nature. I hope I've followed procedure. If
  not, please do let me know. I'm trying to be useful. :-)
 
  -
 
  panic: pmap_remove_ptes: managed page without PG_PVLIST for 0x3c001000
  Stopped at  Debugger+0x4:   popl%ebp
  RUN AT LEAST 'trace' AND 'ps' AND INCLUDE OUTPUT WHEN REPORTING THIS PANIC!
  DO NOT EVEN BOTHER REPORTING THIS WITHOUT INCLUDING THAT INFORMATION!
 
  ddb show panic
  pmap_remove_ptes: managed page without PG_PVLIST for 0x3c001000
 
  ddb trace
  Debugger(d0963718,f6269e38,d0966be4,f6269e38,d1cf1040) at Debugger+0x4
  panic(d0966be4,3c001000,d1ceb16c,f6269e4c,0) at panic+0x5d
  pmap_remove_ptes(d9e39798,d1cf1040,ffcf,3c00,3c003000) at
  pmap_remove_p
  tes+0x142
  pmap_do_remove(d9e39798,3c00,3c003000,0,d0ad7820) at
 pmap_do_remove+0xeb
  pmap_remove(d9e39798,3c00,3c003000,d056c4e9,d9c68e1c) at
  pmap_remove+0x27
  uvm_unmap_kill_entry(d9e3ad80,d9c68e1c,f6269f2c,d043a597,0) at
  uvm_unmap_kill_e
  ntry+0xf8
  uvm_map_teardown(d9e3ad80,1,4,d093e66e,d9cc2700) at uvm_map_teardown+0xac
  uvmspace_free(d9e3ad80,1,1,f6269f6c,d0203009) at uvmspace_free+0x2e
  uvm_exit(d9cc3ba4,d0a4e0a8,4,d093e66e,0) at uvm_exit+0x15
  reaper(d9e33004) at reaper+0x8a
  Bad frame pointer: 0xd0c3ce68
 
 Can you try to see if the following patch helps? It did for me, when
 I was debugging a similar panic back in December. However, my
 explanation why the patch would fix this bug, turned out to be invalid.
 Still the bug went away. If the same happens for you, some more people
 should have a look at the patch:
 
 --- /mount/blink/aegis/project/gg/history/os/src/sys/arch/i386/i386/pmap.c
 2012/10/16 18:31:28   1.117
 +++ /mount/blink/aegis/project/gg/history/os/src/sys/arch/i386/i386/pmap.c
 2013/01/24 17:20:06   1.118
 @@ -495,7 +495,7 @@ pmap_map_ptes(struct pmap *pmap)
 
   /* need to load a new alternate pt space into curpmap? */
   opde = *APDP_PDE;
 -#if defined(MULTIPROCESSOR)  defined(DIAGNOSTIC)
 +#if defined(DIAGNOSTIC)
   if (pmap_valid_entry(opde))
   panic(pmap_map_ptes: APTE valid);
  #endif
 @@ -521,10 +521,8 @@ pmap_unmap_ptes(struct pmap *pmap)
   if (pmap_is_curpmap(pmap)) {
   simple_unlock(pmap-pm_obj.vmobjlock);
   } else {
 -#if defined(MULTIPROCESSOR)
   *APDP_PDE = 0;
   pmap_apte_flush();
 -#endif
   simple_unlock(pmap-pm_obj.vmobjlock);
   simple_unlock(curpcb-pcb_pmap-pm_obj.vmobjlock);
   }

Wish somebody with more in-depth knowledge about the i386 pmap
implementation would respond :(.

Your diff basically disables an optimization where the alternate pmap
is kept around in case we need it again.  Not sure how important this
optimization is.  I guess the primary user of the alternate pmap is
the reaper, and keeping the alternate pmap around there could be
beneficial if the address space of the process we're reaping is
heavily fragmented.

There is something fishy with this optimization.  *APDP_PDE is never
cleared, which means that it becomes stale after the process exits.
Presumably we'd notice the next time we try to map an alternate pmap,
but if the physical pages for the pmap get recycled, we might not.
Not quite seeing how this leads to that panic, but perhaps we should
clear *APDP_PDE in pmap_switch()?



Re: working ral(4) driver with Ralink 35xx chipset. (Now, what's the protocol for testing?)

2013-07-24 Thread Mark Kettenis
 Date: Wed, 24 Jul 2013 02:50:13 -0600
 From: Nathan Goings binarysp...@binaryspike.com
 
 Well, I'm a moron.  I spent several weeks working on the Ralink driver, 
 profiling linux vendor provided code; looking at changes between 2860, 
 35xx, and 3090; and nagging various people.  I made a single change and 
 the ral(4) driver is working on the Ralink 35xx series in both client 
 and hostap mode (currently, 11b mode).

Welcome to the world of hardware hacking.

 What is the protocol for testing?  Here's my agenda for testing:
   1. Try different (faster) wireless modes.
   2. Stress-test bandwidth with file transfers
   3. Long term monitor for packet-loss, reconnects, and ping spikes.
   4. Throw away that Fedora ISO. (supports 35xx out-of-the-box)
   5. Bathe in the glory of OpenBSD.

The protocol for testing is simple: make sure it doesn't break any of
the devices that are already working.

 The change I made was simple:
 /usr/src/sys/dev/ic/rt2860.c : Line 202
 
 Change RT3070_DEF_RF to RT3572_DEF_RF
 
 That causes the rt3090_def_rf[] array to fill with defaults for the 
 35xx.  I'm surprised this change works.

Well, that will certainly break existing support for the RT3090 chips.
So a bit more work will be needed.  First step would be to provide a
proper diff, and show us your dmesg!



Re: working ral(4) driver with Ralink 35xx chipset.

2013-07-25 Thread Mark Kettenis
Looked at your diff, and then noticed that run(4) already had support
for the RT3572 MAC/BBP.  It has a few more RT3572-specific bits that
match what the Linux driver does.  So I ported them over to ral(4).
Could you give the attached diff a go?


Index: rt2860.c
===
RCS file: /cvs/src/sys/dev/ic/rt2860.c,v
retrieving revision 1.68
diff -u -p -r1.68 rt2860.c
--- rt2860.c11 Jun 2013 18:15:53 -  1.68
+++ rt2860.c25 Jul 2013 08:24:44 -
@@ -130,6 +130,7 @@ voidrt2860_set_basicrates(struct rt286
 void   rt2860_select_chan_group(struct rt2860_softc *, int);
 void   rt2860_set_chan(struct rt2860_softc *, u_int);
 void   rt3090_set_chan(struct rt2860_softc *, u_int);
+void   rt3572_set_chan(struct rt2860_softc *, u_int);
 intrt3090_rf_init(struct rt2860_softc *);
 void   rt3090_rf_wakeup(struct rt2860_softc *);
 intrt3090_filter_calib(struct rt2860_softc *, uint8_t, uint8_t,
@@ -197,6 +198,8 @@ static const struct {
uint8_t val;
 }  rt3090_def_rf[] = {
RT3070_DEF_RF
+}, rt3572_def_rf[] = {
+   RT3572_DEF_RF
 };
 
 int
@@ -2158,13 +2161,15 @@ rt2860_select_chan_group(struct rt2860_s
rt2860_mcu_bbp_write(sc, 75, 0x50);
}
} else {
-   if (sc-ext_5ghz_lna) {
+   if (sc-mac_ver == 0x3572)
+   rt2860_mcu_bbp_write(sc, 82, 0x94);
+   else
rt2860_mcu_bbp_write(sc, 82, 0xf2);
+
+   if (sc-ext_5ghz_lna)
rt2860_mcu_bbp_write(sc, 75, 0x46);
-   } else {
-   rt2860_mcu_bbp_write(sc, 82, 0xf2);
+   else
rt2860_mcu_bbp_write(sc, 75, 0x50);
-   }
}
 
tmp = RAL_READ(sc, RT2860_TX_BAND_CFG);
@@ -2191,7 +2196,12 @@ rt2860_select_chan_group(struct rt2860_s
if (sc-mac_ver == 0x3593  sc-ntxchains  2)
tmp |= RT3593_PA_PE_A2_EN;
}
-   RAL_WRITE(sc, RT2860_TX_PIN_CFG, tmp);
+   if (sc-mac_ver == 0x3572) {
+   rt3090_rf_write(sc, 8, 0x00);
+   RAL_WRITE(sc, RT2860_TX_PIN_CFG, tmp);
+   rt3090_rf_write(sc, 8, 0x80);
+   } else
+   RAL_WRITE(sc, RT2860_TX_PIN_CFG, tmp);
 
if (sc-mac_ver == 0x3593) {
tmp = RAL_READ(sc, RT2860_GPIO_CTRL);
@@ -2215,7 +2225,10 @@ rt2860_select_chan_group(struct rt2860_s
else
agc = 0x2e + sc-lna[0];
} else {/* 5GHz band */
-   agc = 0x32 + (sc-lna[group] * 5) / 3;
+   if (sc-mac_ver == 0x3572)
+   agc = 0x22 + (sc-lna[group] * 5) / 3;
+   else
+   agc = 0x32 + (sc-lna[group] * 5) / 3;
}
rt2860_mcu_bbp_write(sc, 66, agc);
 
@@ -2341,6 +2354,154 @@ rt3090_set_chan(struct rt2860_softc *sc,
rt3090_rf_write(sc, 7, rf | RT3070_TUNE);
 }
 
+void
+rt3572_set_chan(struct rt2860_softc *sc, u_int chan)
+{
+   int8_t txpow1, txpow2;
+   uint32_t tmp;
+   uint8_t rf;
+   int i;
+
+   /* find the settings for this channel (we know it exists) */
+   for (i = 0; rt2860_rf2850[i].chan != chan; i++);
+
+   /* use Tx power values from EEPROM */
+   txpow1 = sc-txpow1[i];
+   txpow2 = sc-txpow2[i];
+
+   if (chan = 14) {
+   rt2860_mcu_bbp_write(sc, 25, sc-bbp25);
+   rt2860_mcu_bbp_write(sc, 26, sc-bbp26);
+   } else {
+   /* enable IQ phase correction */
+   rt2860_mcu_bbp_write(sc, 25, 0x09);
+   rt2860_mcu_bbp_write(sc, 26, 0xff);
+   }
+
+   rt3090_rf_write(sc, 2, rt3090_freqs[i].n);
+   rt3090_rf_write(sc, 3, rt3090_freqs[i].k);
+   rf = rt3090_rf_read(sc, 6);
+   rf  = (rf  ~0x0f) | rt3090_freqs[i].r;
+   rf |= (chan = 14) ? 0x08 : 0x04;
+   rt3090_rf_write(sc, 6, rf);
+
+   /* set PLL mode */
+   rf = rt3090_rf_read(sc, 5);
+   rf = ~(0x08 | 0x04);
+   rf |= (chan = 14) ? 0x04 : 0x08;
+   rt3090_rf_write(sc, 5, rf);
+
+   /* set Tx power for chain 0 */
+   if (chan = 14)
+   rf = 0x60 | txpow1;
+   else
+   rf = 0xe0 | (txpow1  0xc)  1 | (txpow1  0x3);
+   rt3090_rf_write(sc, 12, rf);
+
+   /* set Tx power for chain 1 */
+   if (chan = 14)
+   rf = 0x60 | txpow2;
+   else
+   rf = 0xe0 | (txpow2  0xc)  1 | (txpow2  0x3);
+   rt3090_rf_write(sc, 13, rf);
+
+   /* set Tx/Rx streams */
+   rf = rt3090_rf_read(sc, 1);
+   rf = ~0xfc;
+   if (sc-ntxchains == 1)
+   rf |= 1  7 | 1  5;  /* 1T: disable Tx chains 2  3 */
+   else if (sc-ntxchains == 2)
+   rf |= 1  7;   /* 2T: disable Tx chain 3 */
+   if (sc-nrxchains == 1)

Re: working ral(4) driver with Ralink 35xx chipset.

2013-07-25 Thread Mark Kettenis
 Date: Thu, 25 Jul 2013 11:10:21 +0200 (CEST)
 From: Mark Kettenis mark.kette...@xs4all.nl
 
  Date: Thu, 25 Jul 2013 10:33:12 +0200 (CEST)
  From: Mark Kettenis mark.kette...@xs4all.nl
  
  Looked at your diff, and then noticed that run(4) already had support
  for the RT3572 MAC/BBP.  It has a few more RT3572-specific bits that
  match what the Linux driver does.  So I ported them over to ral(4).
  Could you give the attached diff a go?
 
 Actually, this diff isn't quite right.  I'll send a new one shortly.

New diff.  Please give this one a shot.

Index: rt2860.c
===
RCS file: /cvs/src/sys/dev/ic/rt2860.c,v
retrieving revision 1.68
diff -u -p -r1.68 rt2860.c
--- rt2860.c11 Jun 2013 18:15:53 -  1.68
+++ rt2860.c25 Jul 2013 09:30:13 -
@@ -197,6 +197,8 @@ static const struct {
uint8_t val;
 }  rt3090_def_rf[] = {
RT3070_DEF_RF
+}, rt3572_def_rf[] = {
+   RT3572_DEF_RF
 };
 
 int
@@ -2158,13 +2160,15 @@ rt2860_select_chan_group(struct rt2860_s
rt2860_mcu_bbp_write(sc, 75, 0x50);
}
} else {
-   if (sc-ext_5ghz_lna) {
+   if (sc-mac_ver == 0x3572)
+   rt2860_mcu_bbp_write(sc, 82, 0x94);
+   else
rt2860_mcu_bbp_write(sc, 82, 0xf2);
+
+   if (sc-ext_5ghz_lna)
rt2860_mcu_bbp_write(sc, 75, 0x46);
-   } else {
-   rt2860_mcu_bbp_write(sc, 82, 0xf2);
+   else
rt2860_mcu_bbp_write(sc, 75, 0x50);
-   }
}
 
tmp = RAL_READ(sc, RT2860_TX_BAND_CFG);
@@ -2191,7 +2195,12 @@ rt2860_select_chan_group(struct rt2860_s
if (sc-mac_ver == 0x3593  sc-ntxchains  2)
tmp |= RT3593_PA_PE_A2_EN;
}
-   RAL_WRITE(sc, RT2860_TX_PIN_CFG, tmp);
+   if (sc-mac_ver == 0x3572) {
+   rt3090_rf_write(sc, 8, 0x00);
+   RAL_WRITE(sc, RT2860_TX_PIN_CFG, tmp);
+   rt3090_rf_write(sc, 8, 0x80);
+   } else
+   RAL_WRITE(sc, RT2860_TX_PIN_CFG, tmp);
 
if (sc-mac_ver == 0x3593) {
tmp = RAL_READ(sc, RT2860_GPIO_CTRL);
@@ -2215,7 +2224,10 @@ rt2860_select_chan_group(struct rt2860_s
else
agc = 0x2e + sc-lna[0];
} else {/* 5GHz band */
-   agc = 0x32 + (sc-lna[group] * 5) / 3;
+   if (sc-mac_ver == 0x3572)
+   agc = 0x22 + (sc-lna[group] * 5) / 3;
+   else
+   agc = 0x32 + (sc-lna[group] * 5) / 3;
}
rt2860_mcu_bbp_write(sc, 66, agc);
 
@@ -2367,9 +2379,16 @@ rt3090_rf_init(struct rt2860_softc *sc)
RAL_WRITE(sc, RT3070_GPIO_SWITCH, tmp  ~0x20);
 
/* initialize RF registers to default value */
-   for (i = 0; i  nitems(rt3090_def_rf); i++) {
-   rt3090_rf_write(sc, rt3090_def_rf[i].reg,
-   rt3090_def_rf[i].val);
+   if (sc-mac_ver == 0x3572) {
+   for (i = 0; i  nitems(rt3572_def_rf); i++) {
+   rt3090_rf_write(sc, rt3572_def_rf[i].reg,
+   rt3572_def_rf[i].val);
+   }
+   } else {
+   for (i = 0; i  nitems(rt3090_def_rf); i++) {
+   rt3090_rf_write(sc, rt3090_def_rf[i].reg,
+   rt3090_def_rf[i].val);
+   }
}
 
/* select 20MHz bandwidth */



mfi(4) polled command fix

2013-07-29 Thread Mark Kettenis
The diff below fixes the management command code path such that it
works again when polled command completion is needed.  This is
important for flushing the caches when we end up in ddb and want to do
a kernel crash dump or reboot.

I don't have this hardware myself, so some tests would be appreciated.

ok?


Index: mfi.c
===
RCS file: /cvs/src/sys/dev/ic/mfi.c,v
retrieving revision 1.146
diff -u -p -r1.146 mfi.c
--- mfi.c   18 May 2013 08:39:47 -  1.146
+++ mfi.c   29 Jul 2013 09:34:07 -
@@ -1330,9 +1330,11 @@ mfi_do_mgmt(struct mfi_softc *sc, struct
 
DNPRINTF(MFI_D_MISC, %s: mfi_do_mgmt %#x\n, DEVNAME(sc), opc);
 
-   dma_buf = dma_alloc(len, PR_WAITOK);
-   if (dma_buf == NULL)
-   goto done;
+   if (len  0) {
+   dma_buf = dma_alloc(len, cold ? PR_NOWAIT : PR_WAITOK);
+   if (dma_buf == NULL)
+   goto done;
+   }
 
dcmd = ccb-ccb_frame-mfr_dcmd;
memset(dcmd-mdf_mbox, 0, MFI_MBOX_SIZE);



  1   2   3   4   5   6   7   8   9   10   >