radeondrm(4) unaligned access fix

2020-10-25 Thread Mark Kettenis
While making radeondrm(4) work on powerpc64 I'm running into an
interesting unaligned access issue.

Modern POWER CPUs generally support unaligned access.  Normal load and
store unstructions work fine with addresses that aren't naturally
aligned when operating on cached memory.  As a result, clang will
optimize code by replacing two 32-bit store instructions with a single
64-bit store instruction even if there is only 32-bit alignment.

However, this doesn't work for memory that is mapped uncachable.  And
there is some code in radeondrm(4) (and also in amdgpu(4)) that
generates alignment exceptions because it is writing to bits of video
memory that are mapped through the graphics aperture.

There are two ways to fix this.  The compiler won't apply this
optimization if memory is accessed through pointers that are marked
volatile.  Hence the fix below.  In my opinion that is the right fix
as rdev->uvd.cpu_addr is a volatile pointer and that aspect shouldn't
be dropped.  The downside of this approach is that we may need to
maintain some additional local fixes.

The alternative is to emulate the access in the kernel.  I fear that
is what Linux does, which is why they don't notice this issue.  As
such, this issue may crop up in more places and the emulation would
catch them all.  But I'm a bit reluctant to add this emulation since
it may hide bugs in other parts of our kernel.

Thoughts?  ok?


Index: dev/pci/drm/radeon/radeon_uvd.c
===
RCS file: /cvs/src/sys/dev/pci/drm/radeon/radeon_uvd.c,v
retrieving revision 1.3
diff -u -p -r1.3 radeon_uvd.c
--- dev/pci/drm/radeon/radeon_uvd.c 8 Jun 2020 04:48:16 -   1.3
+++ dev/pci/drm/radeon/radeon_uvd.c 25 Oct 2020 09:11:33 -
@@ -781,7 +781,7 @@ int radeon_uvd_get_create_msg(struct rad
uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) -
RADEON_GPU_PAGE_SIZE;
 
-   uint32_t *msg = rdev->uvd.cpu_addr + offs;
+   volatile uint32_t *msg = rdev->uvd.cpu_addr + offs;
uint64_t addr = rdev->uvd.gpu_addr + offs;
 
int r, i;
@@ -817,7 +817,7 @@ int radeon_uvd_get_destroy_msg(struct ra
uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) -
RADEON_GPU_PAGE_SIZE;
 
-   uint32_t *msg = rdev->uvd.cpu_addr + offs;
+   volatile uint32_t *msg = rdev->uvd.cpu_addr + offs;
uint64_t addr = rdev->uvd.gpu_addr + offs;
 
int r, i;



Re: UVM fault check

2020-10-20 Thread Mark Kettenis
> Date: Mon, 19 Oct 2020 10:18:51 +0200
> From: Martin Pieuchot 
> 
> uvm_fault() is one of the most contended "entry point" of the kernel.
> To reduce this contention I'm carefully refactoring this code to be able
> to push the KERNEL_LOCK() inside the fault handler.
> 
> The first aim of this project would be to get the upper layer faults
> (cases 1A and 1B) out of ze big lock.  As these faults do not involve
> `uobj' the scope of this project should be limited to serializing amap
> changes without the KERNEL_LOCK().
> 
> The diff below moves the first part of uvm_fault() into its own
> function: uvm_fault_check().  It is inspired by/imitates the current
> code structure of NetBSD's fault handler. 
> 
> This diff should not have any functional change.
> 
> I hope it helps build better understanding of this area.
> 
> Comments?  Oks?

ok kettenis@

One nit below that you may want to fix before committing this.

> 
> Index: uvm/uvm_fault.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_fault.c,v
> retrieving revision 1.102
> diff -u -p -r1.102 uvm_fault.c
> --- uvm/uvm_fault.c   29 Sep 2020 11:47:41 -  1.102
> +++ uvm/uvm_fault.c   12 Oct 2020 09:01:05 -
> @@ -472,114 +472,101 @@ uvmfault_update_stats(struct uvm_faultin
>   }
>  }
>  
> -/*
> - *   F A U L T   -   m a i n   e n t r y   p o i n t
> - */
> +struct uvm_faultctx {
> + /*
> +  * the following members are set up by uvm_fault_check() and
> +  * read-only after that.
> +  */
> + vm_prot_t enter_prot;
> + vaddr_t startva;
> + int npages;
> + int centeridx;
> + boolean_t narrow;
> + boolean_t wired;
> + paddr_t pa_flags;
> +};
>  
>  /*
> - * uvm_fault: page fault handler
> + * uvm_fault_check: check prot, handle needs-copy, etc.
>   *
> - * => called from MD code to resolve a page fault
> - * => VM data structures usually should be unlocked.   however, it is
> - *   possible to call here with the main map locked if the caller
> - *   gets a write lock, sets it recursive, and then calls us (c.f.
> - *   uvm_map_pageable).   this should be avoided because it keeps
> - *   the map locked off during I/O.
> + *   1. lookup entry.
> + *   2. check protection.
> + *   3. adjust fault condition (mainly for simulated fault).
> + *   4. handle needs-copy (lazy amap copy).
> + *   5. establish range of interest for neighbor fault (aka pre-fault).
> + *   6. look up anons (if amap exists).
> + *   7. flush pages (if MADV_SEQUENTIAL)
> + *
> + * => called with nothing locked.
> + * => if we fail (result != 0) we unlock everything.
> + * => initialize/adjust many members of flt.
>   */
> -#define MASK(entry) (UVM_ET_ISCOPYONWRITE(entry) ? \
> -  ~PROT_WRITE : PROT_MASK)
>  int
> -uvm_fault(vm_map_t orig_map, vaddr_t vaddr, vm_fault_t fault_type,
> -vm_prot_t access_type)
> +uvm_fault_check(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
> +struct vm_anon ***ranons, vm_prot_t access_type)
>  {
> - struct uvm_faultinfo ufi;
> - vm_prot_t enter_prot;
> - boolean_t wired, narrow, promote, locked, shadowed;
> - int npages, nback, nforw, centeridx, result, lcv, gotpages, ret;
> - vaddr_t startva, currva;
> - voff_t uoff;
> - paddr_t pa, pa_flags;
>   struct vm_amap *amap;
>   struct uvm_object *uobj;
> - struct vm_anon *anons_store[UVM_MAXRANGE], **anons, *anon, *oanon;
> - struct vm_page *pages[UVM_MAXRANGE], *pg, *uobjpage;
> + int nback, nforw;
>  
> - anon = NULL;
> - pg = NULL;
> -
> - uvmexp.faults++;/* XXX: locking? */
> - TRACEPOINT(uvm, fault, vaddr, fault_type, access_type, NULL);
> -
> - /* init the IN parameters in the ufi */
> - ufi.orig_map = orig_map;
> - ufi.orig_rvaddr = trunc_page(vaddr);
> - ufi.orig_size = PAGE_SIZE;  /* can't get any smaller than this */
> - if (fault_type == VM_FAULT_WIRE)
> - narrow = TRUE;  /* don't look for neighborhood
> -  * pages on wire */
> - else
> - narrow = FALSE; /* normal fault */
> -
> - /* "goto ReFault" means restart the page fault from ground zero. */
> -ReFault:
>   /* lookup and lock the maps */
> - if (uvmfault_lookup(, FALSE) == FALSE) {
> + if (uvmfault_lookup(ufi, FALSE) == FALSE) {
>   return (EFAULT);
>   }
>  
>  #ifdef DIAGNOSTIC
> - if ((ufi.map->flags & VM_MAP_PAGEABLE) == 0)
> + if ((ufi->map->flags & VM_MAP_PAGEABLE) == 0)
>   panic("uvm_fault: fault on non-pageable map (%p, 0x%lx)",
> - ufi.map, vaddr);
> + ufi->map, ufi->orig_rvaddr);
>  #endif
>  
>   /* check protection */
> - if ((ufi.entry->protection & access_type) != access_type) {
> - uvmfault_unlockmaps(, FALSE);
> + if ((ufi->entry->protection & access_type) != access_type) {
> + 

Re: const/C99 & locks for uvm_pagerops

2020-10-20 Thread Mark Kettenis
> Date: Tue, 20 Oct 2020 11:29:03 +0200
> From: Martin Pieuchot 
> 
> Diff below use C99 initializer and constify the various "struct uvm_pagerops"
> in tree.
> 
> While here add some KERNEL_ASSERT_LOCKED() to places where the `uobj'
> locking has been removed and that should be revisited.  This is to help
> a future myself or another developer to look at what needs some love.
> 
> ok?

ok kettenis@

> Index: dev/pci/drm/drm_gem.c
> ===
> RCS file: /cvs/src/sys/dev/pci/drm/drm_gem.c,v
> retrieving revision 1.11
> diff -u -p -r1.11 drm_gem.c
> --- dev/pci/drm/drm_gem.c 22 Aug 2020 04:53:50 -  1.11
> +++ dev/pci/drm/drm_gem.c 20 Oct 2020 09:01:08 -
> @@ -58,12 +58,11 @@ boolean_t drm_flush(struct uvm_object *,
>  int drm_fault(struct uvm_faultinfo *, vaddr_t, vm_page_t *, int, int,
>  vm_fault_t, vm_prot_t, int);
>  
> -struct uvm_pagerops drm_pgops = {
> - NULL,
> - drm_ref,
> - drm_unref,
> - drm_fault,
> - drm_flush,
> +const struct uvm_pagerops drm_pgops = {
> + .pgo_reference = drm_ref,
> + .pgo_detach = drm_unref,
> + .pgo_fault = drm_fault,
> + .pgo_flush = drm_flush,
>  };
>  
>  void
> Index: dev/pci/drm/ttm/ttm_bo_vm.c
> ===
> RCS file: /cvs/src/sys/dev/pci/drm/ttm/ttm_bo_vm.c,v
> retrieving revision 1.22
> diff -u -p -r1.22 ttm_bo_vm.c
> --- dev/pci/drm/ttm/ttm_bo_vm.c   18 Oct 2020 09:22:32 -  1.22
> +++ dev/pci/drm/ttm/ttm_bo_vm.c   20 Oct 2020 09:01:08 -
> @@ -903,7 +903,7 @@ ttm_bo_vm_detach(struct uvm_object *uobj
>   ttm_bo_put(bo);
>  }
>  
> -struct uvm_pagerops ttm_bo_vm_ops = {
> +const struct uvm_pagerops ttm_bo_vm_ops = {
>   .pgo_fault = ttm_bo_vm_fault,
>   .pgo_reference = ttm_bo_vm_reference,
>   .pgo_detach = ttm_bo_vm_detach
> Index: uvm/uvm_aobj.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_aobj.c,v
> retrieving revision 1.87
> diff -u -p -r1.87 uvm_aobj.c
> --- uvm/uvm_aobj.c22 Sep 2020 14:31:08 -  1.87
> +++ uvm/uvm_aobj.c20 Oct 2020 09:01:08 -
> @@ -181,16 +181,14 @@ int uao_grow_convert(struct uvm_object *
>  
>  /*
>   * aobj_pager
> - * 
> + *
>   * note that some functions (e.g. put) are handled elsewhere
>   */
> -struct uvm_pagerops aobj_pager = {
> - NULL,   /* init */
> - uao_reference,  /* reference */
> - uao_detach, /* detach */
> - NULL,   /* fault */
> - uao_flush,  /* flush */
> - uao_get,/* get */
> +const struct uvm_pagerops aobj_pager = {
> + .pgo_reference = uao_reference,
> + .pgo_detach = uao_detach,
> + .pgo_flush = uao_flush,
> + .pgo_get = uao_get,
>  };
>  
>  /*
> @@ -810,6 +808,7 @@ uao_init(void)
>  void
>  uao_reference(struct uvm_object *uobj)
>  {
> + KERNEL_ASSERT_LOCKED();
>   uao_reference_locked(uobj);
>  }
>  
> @@ -834,6 +833,7 @@ uao_reference_locked(struct uvm_object *
>  void
>  uao_detach(struct uvm_object *uobj)
>  {
> + KERNEL_ASSERT_LOCKED();
>   uao_detach_locked(uobj);
>  }
>  
> @@ -908,6 +908,8 @@ uao_flush(struct uvm_object *uobj, voff_
>   struct vm_page *pp;
>   voff_t curoff;
>  
> + KERNEL_ASSERT_LOCKED();
> +
>   if (flags & PGO_ALLPAGES) {
>   start = 0;
>   stop = (voff_t)aobj->u_pages << PAGE_SHIFT;
> @@ -1028,6 +1030,8 @@ uao_get(struct uvm_object *uobj, voff_t 
>   vm_page_t ptmp;
>   int lcv, gotpages, maxpages, swslot, rv, pageidx;
>   boolean_t done;
> +
> + KERNEL_ASSERT_LOCKED();
>  
>   /* get number of pages */
>   maxpages = *npagesp;
> Index: uvm/uvm_aobj.h
> ===
> RCS file: /cvs/src/sys/uvm/uvm_aobj.h,v
> retrieving revision 1.16
> diff -u -p -r1.16 uvm_aobj.h
> --- uvm/uvm_aobj.h11 Jul 2014 16:35:40 -  1.16
> +++ uvm/uvm_aobj.h20 Oct 2020 09:01:08 -
> @@ -69,7 +69,7 @@ int uao_grow(struct uvm_object *, int);
>   * globals
>   */
>  
> -extern struct uvm_pagerops aobj_pager;
> +extern const struct uvm_pagerops aobj_pager;
>  
>  #endif /* _KERNEL */
>  
> Index: uvm/uvm_device.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_device.c,v
> retrieving revision 1.57
> diff -u -p -r1.57 uvm_device.c
> --- uvm/uvm_device.c  8 Dec 2019 12:37:45 -   1.57
> +++ uvm/uvm_device.c  20 Oct 2020 09:01:08 -
> @@ -70,12 +70,11 @@ static boolean_tudv_flush(struct
>  /*
>   * master pager structure
>   */
> -struct uvm_pagerops uvm_deviceops = {
> - NULL,   /* inited statically */
> - udv_reference,
> - udv_detach,
> - udv_fault,
> - udv_flush,
> +const struct uvm_pagerops uvm_deviceops = {
> + .pgo_reference = udv_reference,
> + 

Re: arm64 ddb: decode "udf" instruction

2020-10-19 Thread Mark Kettenis
> Date: Mon, 19 Oct 2020 20:03:01 +0200
> From: Christian Weisgerber 
> 
> This decodes the UDF ("permanently undefined") instruction in ddb's
> arm64 disassembler.  The particular immediate16 format appears to
> be unique to this instruction.
> 
> OK?  Or don't bother?

ok kettenis@

> Index: arch/arm64/arm64/disasm.c
> ===
> RCS file: /cvs/src/sys/arch/arm64/arm64/disasm.c,v
> retrieving revision 1.2
> diff -u -p -r1.2 disasm.c
> --- arch/arm64/arm64/disasm.c 11 Sep 2020 09:27:10 -  1.2
> +++ arch/arm64/arm64/disasm.c 19 Oct 2020 16:17:55 -
> @@ -3107,6 +3107,11 @@ OP4FUNC(op_tbz, b5, b40, imm14, Rt)
>   PRINTF("\n");
>  }
>  
> +OP1FUNC(op_udf, imm16)
> +{
> + PRINTF("udf\t#0x%"PRIx64"\n", imm16);
> +}
> +
>  OP4FUNC(op_udiv, sf, Rm, Rn, Rd)
>  {
>   PRINTF("udiv\t%s, %s, %s\n",
> @@ -3668,6 +3673,8 @@ struct insn_info {
>   {{ 5,16}, { 0, 0}, { 0, 0}, { 0, 0}, { 0, 0}, { 0, 0}, { 0, 0}, { 0, 0}}
>  #define FMT_IMM16_LL \
>   {{ 5,16}, { 0, 2}, { 0, 0}, { 0, 0}, { 0, 0}, { 0, 0}, { 0, 0}, { 0, 0}}
> +#define FMT_IMM16_UDF\
> + {{ 0,16}, { 0, 0}, { 0, 0}, { 0, 0}, { 0, 0}, { 0, 0}, { 0, 0}, { 0, 0}}
>  #define FMT_OP0_OP1_CRN_CRM_OP2_RT   \
>   {{19, 2}, {16, 3}, {12, 4}, { 8, 4}, { 5, 3}, { 0, 5}, { 0, 0}, { 0, 0}}
>  #define FMT_IMM7_RT2_RN_RT   \
> @@ -3786,6 +3793,7 @@ static const struct insn_info insn_table
>   { 0xd800, 0xdac1, FMT_Z_M_RN_RD,   op_pacia },
>   { 0xcc00, 0x4e284800, FMT_M_D_RN_RD,   op_simd_aes },
>   { 0x8c00, 0x5e280800, FMT_OP3_RN_RD,   op_simd_sha_reg2 },
> + { 0x, 0x, FMT_IMM16_UDF,   op_udf },
>   { 0xfff8f01f, 0xd500401f, FMT_OP1_CRM_OP2, op_msr_imm },
>   { 0xfff8, 0xd508, FMT_OP1_CRN_CRM_OP2_RT,  op_sys },
>   { 0xfff8, 0xd528, FMT_OP1_CRN_CRM_OP2_RT,  op_sysl },
> -- 
> Christian "naddy" Weisgerber  na...@mips.inka.de
> 
> 



Re: arm64, armv7: proper illegal instruction

2020-10-19 Thread Mark Kettenis
> From: "Theo de Raadt" 
> Date: Mon, 19 Oct 2020 09:25:30 -0600
> 
> Mark Kettenis  wrote:
> 
> > > Date: Mon, 19 Oct 2020 16:36:14 +0200
> > > From: Christian Weisgerber 
> > > 
> > > Belatedly, ARM has taken a slice of the reserved opcode space and
> > > assigned it as a properly defined illegal instruction, udf #imm16.
> > > (Armv8 Architecture Reference Manual, edition F.c, section C6.2.335).
> > > Clang already knows about it.
> > > 
> > > We really should use this instead of picking something ad-hoc out
> > > of the opcode space.
> > > 
> > > I have verified that this builds on arm64, produces a SIGILL in
> > > userland, and drops me into ddb in the kernel.
> > > 
> > > armv7 has an equivalent instruction.  kettenis@ confirms it builds
> > > and SIGILLs there.
> > > 
> > > OK?
> > 
> > So on armv7 there is an additional consideration.  The architecture
> > defines tow instruction sets: A32 and T32 (Thumb).  A32 instructions
> > are 32-bit but T32 instructions can be 16-bit.  If an attacker can
> > switch the CPU into T32 mode, it will interpret this UDF instruction
> > as two different instructions.  We may have to consider how "bad"
> > these two instructions are and maybe tune that #imm16 accordingly.
> 
> If the attacker has turned on thumb in the kernel I think all causes
> are lost ... aren't there thousands of thumb gadgets?

Probably.

So 0xa000f7f0 happens to be the 32-bit Thumb UDF instruction.  If you
jump into the middle of that you get an ADR instruction.  SoI don't
think that one was particularly "safe".  So I think naddy's patch is
fine.




Re: arm64, armv7: proper illegal instruction

2020-10-19 Thread Mark Kettenis
> Date: Mon, 19 Oct 2020 16:36:14 +0200
> From: Christian Weisgerber 
> 
> Belatedly, ARM has taken a slice of the reserved opcode space and
> assigned it as a properly defined illegal instruction, udf #imm16.
> (Armv8 Architecture Reference Manual, edition F.c, section C6.2.335).
> Clang already knows about it.
> 
> We really should use this instead of picking something ad-hoc out
> of the opcode space.
> 
> I have verified that this builds on arm64, produces a SIGILL in
> userland, and drops me into ddb in the kernel.
> 
> armv7 has an equivalent instruction.  kettenis@ confirms it builds
> and SIGILLs there.
> 
> OK?

So on armv7 there is an additional consideration.  The architecture
defines tow instruction sets: A32 and T32 (Thumb).  A32 instructions
are 32-bit but T32 instructions can be 16-bit.  If an attacker can
switch the CPU into T32 mode, it will interpret this UDF instruction
as two different instructions.  We may have to consider how "bad"
these two instructions are and maybe tune that #imm16 accordingly.

> Index: lib/csu/aarch64/md_init.h
> ===
> RCS file: /cvs/src/lib/csu/aarch64/md_init.h,v
> retrieving revision 1.9
> diff -u -p -r1.9 md_init.h
> --- lib/csu/aarch64/md_init.h 15 Oct 2020 16:30:21 -  1.9
> +++ lib/csu/aarch64/md_init.h 19 Oct 2020 11:57:02 -
> @@ -115,5 +115,5 @@
>   "   svc #0  \n" \
>   "   dsb nsh \n" \
>   "   isb \n" \
> - "   .word 0xa000f7f0 /* illegal */  \n" \
> + "   udf #0  \n" \
>   ".previous");
> Index: lib/csu/arm/md_init.h
> ===
> RCS file: /cvs/src/lib/csu/arm/md_init.h,v
> retrieving revision 1.16
> diff -u -p -r1.16 md_init.h
> --- lib/csu/arm/md_init.h 15 Oct 2020 16:30:23 -  1.16
> +++ lib/csu/arm/md_init.h 19 Oct 2020 13:23:00 -
> @@ -159,5 +159,5 @@
>   "   swi #0  \n" \
>   "   dsb nsh \n" \
>   "   isb \n" \
> - "   .word 0xa000f7f0 /* illegal */  \n" \
> + "   udf #0  \n" \
>   ".previous");
> Index: lib/libc/arch/aarch64/sys/tfork_thread.S
> ===
> RCS file: /cvs/src/lib/libc/arch/aarch64/sys/tfork_thread.S,v
> retrieving revision 1.5
> diff -u -p -r1.5 tfork_thread.S
> --- lib/libc/arch/aarch64/sys/tfork_thread.S  18 Oct 2020 14:28:16 -  
> 1.5
> +++ lib/libc/arch/aarch64/sys/tfork_thread.S  19 Oct 2020 11:59:32 -
> @@ -43,6 +43,6 @@ ENTRY(__tfork_thread)
>   mov x0, x3
>   blr x2
>   SYSTRAP(__threxit)
> - .word   0xa000f7f0  /* illegal on all cpus? */
> + udf #0
>   .cfi_endproc
>  END(__tfork_thread)
> Index: lib/libc/arch/arm/sys/tfork_thread.S
> ===
> RCS file: /cvs/src/lib/libc/arch/arm/sys/tfork_thread.S,v
> retrieving revision 1.5
> diff -u -p -r1.5 tfork_thread.S
> --- lib/libc/arch/arm/sys/tfork_thread.S  18 Oct 2020 14:28:17 -  
> 1.5
> +++ lib/libc/arch/arm/sys/tfork_thread.S  19 Oct 2020 13:23:35 -
> @@ -37,5 +37,5 @@ ENTRY(__tfork_thread)
>   mov pc, r2
>   nop
>   SYSTRAP(__threxit)
> - .word   0xa000f7f0  /* illegal on all cpus? */
> + udf #0
>  END(__tfork_thread)
> Index: sys/arch/arm/arm/sigcode.S
> ===
> RCS file: /cvs/src/sys/arch/arm/arm/sigcode.S,v
> retrieving revision 1.9
> diff -u -p -r1.9 sigcode.S
> --- sys/arch/arm/arm/sigcode.S13 Mar 2020 08:46:50 -  1.9
> +++ sys/arch/arm/arm/sigcode.S19 Oct 2020 13:23:55 -
> @@ -72,7 +72,7 @@ _C_LABEL(esigcode):
>  
>   .globl  sigfill
>  sigfill:
> - .word   0xa000f7f0  /* illegal on all cpus? */
> + udf #0
>  esigfill:
>  
>   .data
> Index: sys/arch/arm64/arm64/locore.S
> ===
> RCS file: /cvs/src/sys/arch/arm64/arm64/locore.S,v
> retrieving revision 1.31
> diff -u -p -r1.31 locore.S
> --- sys/arch/arm64/arm64/locore.S 13 Mar 2020 00:14:38 -  1.31
> +++ sys/arch/arm64/arm64/locore.S 19 Oct 2020 12:02:23 -
> @@ -366,7 +366,7 @@ _C_LABEL(esigcode):
>  
>   .globl  sigfill
>  sigfill:
> - .word   0xa000f7f0  /* FIXME: illegal on all cpus? */
> + udf #0
>  esigfill:
>  
>   .data
> -- 
> Christian "naddy" Weisgerber  na...@mips.inka.de
> 
> 



Re: uao_init() cleanup

2020-10-19 Thread Mark Kettenis
> Date: Mon, 19 Oct 2020 10:00:49 +0200
> From: Martin Pieuchot 
> 
> uao_init() is called from uvm_km_init() which itself is called by
> uvm_init().  None of the *init() functions in UVM have a guard, so be
> coherent and remove this one.
> 
> ok?

Yes, that seems redundant.

ok kettenis@

> Index: uvm/uvm_aobj.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_aobj.c,v
> retrieving revision 1.87
> diff -u -p -r1.87 uvm_aobj.c
> --- uvm/uvm_aobj.c22 Sep 2020 14:31:08 -  1.87
> +++ uvm/uvm_aobj.c13 Oct 2020 09:25:20 -
> @@ -788,12 +788,6 @@ uao_create(vsize_t size, int flags)
>  void
>  uao_init(void)
>  {
> - static int uao_initialized;
> -
> - if (uao_initialized)
> - return;
> - uao_initialized = TRUE;
> -
>   /*
>* NOTE: Pages for this pool must not come from a pageable
>* kernel map!
> 
> 



arm64: save/restore FPCR in setjmp/longjmp

2020-10-18 Thread Mark Kettenis
There are some free slots, so we can just save it directly after the
FPU registers.

This is needed to fix the lib/libc/setjmp-fpu regress on arm64.  But
it isn't enough.  Because the hardware doesn't implement the
floating-point exception trap control bits, checking those bits can't
work on arm64 (and armv7).  So the 2nd part of this diff sprinkles
some #if/#endif to skip the parts that check those bits.

ok?


Index: lib/libc/arch/aarch64/gen/_setjmp.S
===
RCS file: /cvs/src/lib/libc/arch/aarch64/gen/_setjmp.S,v
retrieving revision 1.3
diff -u -p -r1.3 _setjmp.S
--- lib/libc/arch/aarch64/gen/_setjmp.S 1 Oct 2018 22:49:50 -   1.3
+++ lib/libc/arch/aarch64/gen/_setjmp.S 18 Oct 2020 19:22:40 -
@@ -53,7 +53,9 @@ ENTRY(_setjmp)
stp d8, d9, [x0], #16
stp d10, d11, [x0], #16
stp d12, d13, [x0], #16
-   stp d14, d15, [x0]
+   stp d14, d15, [x0], #16
+   mrs x1, fpcr
+   str x1, [x0]
 #endif
 
/* Return value */
@@ -92,7 +94,9 @@ ENTRY(_longjmp)
ldp d8, d9, [x0], #16
ldp d10, d11, [x0], #16
ldp d12, d13, [x0], #16
-   ldp d14, d15, [x0]
+   ldp d14, d15, [x0], #16
+   ldr x2, [x0]
+   msr fpcr, x2
 #endif
 
/* Load the return value */
Index: lib/libc/arch/aarch64/gen/setjmp.S
===
RCS file: /cvs/src/lib/libc/arch/aarch64/gen/setjmp.S,v
retrieving revision 1.3
diff -u -p -r1.3 setjmp.S
--- lib/libc/arch/aarch64/gen/setjmp.S  1 Oct 2018 22:49:50 -   1.3
+++ lib/libc/arch/aarch64/gen/setjmp.S  18 Oct 2020 19:22:40 -
@@ -61,7 +61,9 @@ ENTRY(setjmp)
stp d8, d9, [x0], #16
stp d10, d11, [x0], #16
stp d12, d13, [x0], #16
-   stp d14, d15, [x0]
+   stp d14, d15, [x0], #16
+   mrs x1, fpcr
+   str x1, [x0]
 
/* Return value */
mov x0, #0
@@ -108,7 +110,9 @@ ENTRY(longjmp)
ldp d8, d9, [x0], #16
ldp d10, d11, [x0], #16
ldp d12, d13, [x0], #16
-   ldp d14, d15, [x0]
+   ldp d14, d15, [x0], #16
+   ldr x1, [x0]
+   msr fpcr, x1
 
/* Load the return value */
cmp w3, #0





Index: regress/lib/libc/setjmp-fpu/fpu.c
===
RCS file: /cvs/src/regress/lib/libc/setjmp-fpu/fpu.c,v
retrieving revision 1.1
diff -u -p -r1.1 fpu.c
--- regress/lib/libc/setjmp-fpu/fpu.c   16 Jan 2020 13:04:02 -  1.1
+++ regress/lib/libc/setjmp-fpu/fpu.c   18 Oct 2020 19:34:28 -
@@ -34,10 +34,12 @@ main(int argc, char *argv[])
rv = fegetround();
if (rv != FE_UPWARD)
errx(1, "fegetround returned %d, not FE_UPWARD", rv);
+#if !defined(__arm__) && !defined(__aarch64__)
rv = fegetexcept();
if (rv != FE_DIVBYZERO)
errx(1, "fegetexcept returned %d, not FE_DIVBYZERO",
rv);
+#endif
 
/* Verify that the FPU exception flags weren't clobbered. */
flag = 0;
Index: regress/lib/libc/setjmp-fpu/setjmp-fpu.c
===
RCS file: /cvs/src/regress/lib/libc/setjmp-fpu/setjmp-fpu.c,v
retrieving revision 1.5
diff -u -p -r1.5 setjmp-fpu.c
--- regress/lib/libc/setjmp-fpu/setjmp-fpu.c16 Jan 2020 13:04:02 -  
1.5
+++ regress/lib/libc/setjmp-fpu/setjmp-fpu.c18 Oct 2020 19:34:28 -
@@ -42,10 +42,12 @@ TEST_SETJMP(void)
rv = fegetround();
if (rv != FE_UPWARD)
errx(1, "fegetround returned %d, not FE_UPWARD", rv);
+#if !defined(__arm__) && !defined(__aarch64__)
rv = fegetexcept();
if (rv != FE_DIVBYZERO)
errx(1, "fegetexcept returned %d, not FE_DIVBYZERO",
rv);
+#endif
 
/* Verify that the FPU exception flags weren't clobbered. */
flag = 0;



Re: drm: avoid possible deadlock in kthread_stop

2020-10-17 Thread Mark Kettenis
> Date: Sat, 17 Oct 2020 16:16:01 +0200
> From: Sebastien Marie 
> 
> On Wed, Oct 14, 2020 at 08:58:04PM +0200, Mark Kettenis wrote:
> > > Date: Thu, 1 Oct 2020 09:09:50 +0200
> > > From: Sebastien Marie 
> > > 
> > > Hi,
> > > 
> > > Currently, when a process is calling kthread_stop(), it sets a flag
> > > asking the thread to stop, and enters in sleep mode, but the code
> > > doing the stop doesn't wakeup the caller of kthread_stop().
> > > 
> > > The thread should also be unparked as else it will not seen the
> > > KTHREAD_SHOULDSTOP flag. it follows what Linux is doing.
> > > 
> > > While here, I added some comments in the locking logic for park/unpark
> > > and stop.
> > > 
> > > Comments or OK ?
> > 
> > I don't think adding all those comments makes a lot of sense.  This
> > uses a fairly standard tsleep/wakeup pattern and the some of the
> > comments really state the obvious.
> 
> it was the way I did to audit the code and understand what it did.
> 
> > Can you do a diff that just adds
> > the missing wakeup() and kthread_unpark() call?
> 
> here a new diff.

ok kettenis@

> diff 4efbe95c75086b3a7b0074651bfa04fd58990a98 /home/semarie/repos/openbsd/src
> blob - fd797effc74d6eb4a172c81be8feac0ed168ec5d
> file + sys/dev/pci/drm/drm_linux.c
> --- sys/dev/pci/drm/drm_linux.c
> +++ sys/dev/pci/drm/drm_linux.c
> @@ -207,6 +217,7 @@ kthread_func(void *arg)
>  
>   ret = thread->func(thread->data);
>   thread->flags |= KTHREAD_STOPPED;
> + wakeup(thread);
>   kthread_exit(ret);
>  }
>  
> @@ -298,8 +327,9 @@ kthread_stop(struct proc *p)
>  
>   while ((thread->flags & KTHREAD_STOPPED) == 0) {
>   thread->flags |= KTHREAD_SHOULDSTOP;
> + kthread_unpark(p);
>   wake_up_process(thread->proc);
>   tsleep_nsec(thread, PPAUSE, "stop", INFSLP);
>   }
>   LIST_REMOVE(thread, next);
>  
> 
> Thanks.
> -- 
> Sebastien Marie
> 



Re: uvm_grow(): serialize updates

2020-10-16 Thread Mark Kettenis
> Date: Wed, 14 Oct 2020 12:01:10 +0200
> From: Martin Pieuchot 
> 
> Getting uvm_fault() out of the KERNEL_LOCK() alone is not enough to
> reduce the contention due to page faults.  A single part of the handler
> spinning on the lock is enough to hide bugs and increase latency.  One
> recent example is the uvm_map_inentry() check.
> 
> uvm_grow() is another small function called in trap that currently needs
> the KERNEL_LOCK().  Diff below changes this requirement without removing
> the KERNEL_LOCK() yet. 
> 
> It uses the underlying vm_space lock to serialize writes to the fields
> of "truct vmspace". 
> 
> While here I also documented that the reference counting is currently
> protected by the KERNEL_LOCK() and introduced a wrapper to help with
> future changes and reduce the differences with NetBSD.
> 
> Once uvm_grow() is safe to be called without the KERNEL_LOCK() MD trap
> functions can be adapted on a case-per-case basis.
> 
> Comments, Oks?

I considered the same approach of using the lock of the underlying
vm_map.  I have seen some evidence of contention on that lock, but I
don't think it is too bad (yet).  I looked at a lock-free approach as
well, but it got a bit messy.  So I think the approach is fine.
However...

> 
> Index: kern/kern_sysctl.c
> ===
> RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
> retrieving revision 1.379
> diff -u -p -r1.379 kern_sysctl.c
> --- kern/kern_sysctl.c1 Sep 2020 01:53:50 -   1.379
> +++ kern/kern_sysctl.c14 Oct 2020 09:35:00 -
> @@ -1783,7 +1783,7 @@ sysctl_proc_args(int *name, u_int namele
>   /* Execing - danger. */
>   if ((vpr->ps_flags & PS_INEXEC))
>   return (EBUSY);
> - 
> +
>   /* Only owner or root can get env */
>   if ((op == KERN_PROC_NENV || op == KERN_PROC_ENV) &&
>   (vpr->ps_ucred->cr_uid != cp->p_ucred->cr_uid &&
> @@ -1792,7 +1792,7 @@ sysctl_proc_args(int *name, u_int namele
>  
>   ps_strings = vpr->ps_strings;
>   vm = vpr->ps_vmspace;
> - vm->vm_refcnt++;
> + uvmspace_addref(vm);
>   vpr = NULL;
>  
>   buf = malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
> Index: kern/sys_process.c
> ===
> RCS file: /cvs/src/sys/kern/sys_process.c,v
> retrieving revision 1.83
> diff -u -p -r1.83 sys_process.c
> --- kern/sys_process.c16 Mar 2020 11:58:46 -  1.83
> +++ kern/sys_process.c14 Oct 2020 09:35:00 -
> @@ -850,13 +850,12 @@ process_domem(struct proc *curp, struct 
>   if ((error = process_checkioperm(curp, tr)) != 0)
>   return error;
>  
> - /* XXXCDC: how should locking work here? */
>   vm = tr->ps_vmspace;
>   if ((tr->ps_flags & PS_EXITING) || (vm->vm_refcnt < 1))
>   return EFAULT;
>   addr = uio->uio_offset;
>  
> - vm->vm_refcnt++;
> + uvmspace_addref(vm);
>  
>   error = uvm_io(>vm_map, uio,
>   (uio->uio_rw == UIO_WRITE) ? UVM_IO_FIXPROT : 0);
> @@ -892,7 +891,7 @@ process_auxv_offset(struct proc *curp, s
>   if ((tr->ps_flags & PS_EXITING) || (vm->vm_refcnt < 1))
>   return EFAULT;
>  
> - vm->vm_refcnt++;
> + uvmspace_addref(vm);
>   error = uvm_io(>vm_map, , 0);
>   uvmspace_free(vm);
>  
> Index: uvm/uvm_extern.h
> ===
> RCS file: /cvs/src/sys/uvm/uvm_extern.h,v
> retrieving revision 1.153
> diff -u -p -r1.153 uvm_extern.h
> --- uvm/uvm_extern.h  13 Sep 2020 10:05:25 -  1.153
> +++ uvm/uvm_extern.h  14 Oct 2020 09:35:00 -
> @@ -192,11 +192,13 @@ struct pmap;
>   * Several fields are temporary (text, data stuff).
>   *
>   *  Locks used to protect struct members in this file:
> + *   K   kernel lock
>   *   I   immutable after creation
> + *   v   vm_map's lock
>   */
>  struct vmspace {
>   struct  vm_map vm_map;  /* VM address map */
> - int vm_refcnt;  /* number of references */
> + int vm_refcnt;  /* [K] number of references */
>   caddr_t vm_shm; /* SYS5 shared memory private data XXX */
>  /* we copy from vm_startcopy to the end of the structure on fork */
>  #define vm_startcopy vm_rssize
> @@ -205,9 +207,9 @@ struct vmspace {
>   segsz_t vm_tsize;   /* text size (pages) XXX */
>   segsz_t vm_dsize;   /* data size (pages) XXX */
>   segsz_t vm_dused;   /* data segment length (pages) XXX */
> - segsz_t vm_ssize;   /* stack size (pages) */
> - caddr_t vm_taddr;   /* user virtual address of text XXX */
> - caddr_t vm_daddr;   /* user virtual address of data XXX */
> + segsz_t vm_ssize;   /* [v] stack size (pages) */
> + caddr_t vm_taddr;   /* [I] user virtual address of text */
> + caddr_t vm_daddr;   /* [I] user virtual address of data */
>   caddr_t vm_maxsaddr;/* [I] user VA at max stack growth */

Re: _exit(2), execve(2): cancel interval timers MP-safely

2020-10-15 Thread Mark Kettenis
> Date: Wed, 14 Oct 2020 20:14:18 -0500
> From: Scott Cheloha 
> 
> On Wed, Oct 14, 2020 at 08:06:52PM -0500, Scott Cheloha wrote:
> > _exit(2) and execve(2) need to obey the locking protocol described in
> > proc.h when manipulating the per-process interval timer state.
> > 
> > While we're here we can also remove the now pointless splclock/splx
> > dance from execve(2).
> > 
> > The easiest way to obey the locking protocol is to reuse the interface
> > the syscalls are using: setitimer() in kern_time.c.
> > 
> > Given that we only want to cancel the timers I wrote a small helper
> > function, cancelitimer().  I think it's tidier than putting the
> > prototype for setitimer() into sys/time.h and requiring the caller to
> > prepare an itimerval struct before calling.
> > 
> > Compare:
> > 
> > struct itimerval itv;
> > timerclear(_value);
> > timerclear(_interval);
> > setitimer(ITIMER_REAL, , NULL);
> > 
> > with:
> > 
> > cancelitimer(ITIMER_REAL);
> > 
> > ... should I shove the for-loop into the helper function too?  Maybe
> > call it "cancel_all_itimers()"?  I have a vague feeling that showing
> > the reader that there are multiple timers is a good thing here, but
> > then again maybe I'm wrong and nobody cares.
> > 
> > Preferences?  ok?
> 
> Whoops, forgot the kern_time.c part of the diff.
> 
> Index: kern/kern_exit.c
> ===
> RCS file: /cvs/src/sys/kern/kern_exit.c,v
> retrieving revision 1.188
> diff -u -p -r1.188 kern_exit.c
> --- kern/kern_exit.c  18 Mar 2020 15:48:21 -  1.188
> +++ kern/kern_exit.c  15 Oct 2020 01:12:50 -
> @@ -194,7 +194,11 @@ exit1(struct proc *p, int xexit, int xsi
>   /* close open files and release open-file table */
>   fdfree(p);
>  
> - timeout_del(>ps_realit_to);
> + /* cancel all interval timers */
> + int i;

Don't put variable definitions in the middle of a function.

> + for (i = 0; i < nitems(pr->ps_timer); i++)
> + cancelitimer(i);
> +
>   timeout_del(>ps_rucheck_to);
>  #ifdef SYSVSEM
>   semexit(pr);
> Index: kern/kern_exec.c
> ===
> RCS file: /cvs/src/sys/kern/kern_exec.c,v
> retrieving revision 1.217
> diff -u -p -r1.217 kern_exec.c
> --- kern/kern_exec.c  11 Jul 2020 22:59:05 -  1.217
> +++ kern/kern_exec.c  15 Oct 2020 01:12:50 -
> @@ -656,14 +656,9 @@ sys_execve(struct proc *p, void *v, regi
>   }
>  
>   if (pr->ps_flags & PS_SUGIDEXEC) {
> - int i, s = splclock();
> -
> - timeout_del(>ps_realit_to);
> - for (i = 0; i < nitems(pr->ps_timer); i++) {
> - timespecclear(>ps_timer[i].it_interval);
> - timespecclear(>ps_timer[i].it_value);
> - }
> - splx(s);
> + int i;

You should put a blank line here to separate definitions from the code.

> + for (i = 0; i < nitems(pr->ps_timer); i++)
> + cancelitimer(i);
>   }
>  
>   /* reset CPU time usage for the thread, but not the process */
> Index: kern/kern_time.c
> ===
> RCS file: /cvs/src/sys/kern/kern_time.c,v
> retrieving revision 1.146
> diff -u -p -r1.146 kern_time.c
> --- kern/kern_time.c  13 Oct 2020 17:33:39 -  1.146
> +++ kern/kern_time.c  15 Oct 2020 01:12:50 -
> @@ -572,6 +572,16 @@ setitimer(int which, const struct itimer
>   }
>  }
>  
> +void
> +cancelitimer(int which)
> +{
> + struct itimerval itv;
> +
> + timerclear(_value);
> + timerclear(_interval);
> + setitimer(which, , NULL);
> +}
> +
>  int
>  sys_getitimer(struct proc *p, void *v, register_t *retval)
>  {
> Index: sys/time.h
> ===
> RCS file: /cvs/src/sys/sys/time.h,v
> retrieving revision 1.55
> diff -u -p -r1.55 time.h
> --- sys/time.h6 Jul 2020 13:33:09 -   1.55
> +++ sys/time.h15 Oct 2020 01:12:50 -
> @@ -307,6 +307,7 @@ time_tgetuptime(void);
>  struct proc;
>  int  clock_gettime(struct proc *, clockid_t, struct timespec *);
>  
> +void cancelitimer(int);
>  int  itimerfix(struct timeval *);
>  int  itimerdecr(struct itimerspec *, long);
>  int  settime(const struct timespec *);
> 
> 



Re: Lenovo X1 gen 8 touchpad interrupt: pchgpio(4)

2020-10-14 Thread Mark Kettenis
> From: James Hastings 
> Date: Sun, 11 Oct 2020 03:49:11 -0400 (EDT)
> 
> On Thu, 08 Oct 2020 20:29:38 + Mark Kettenis wrote:
> > Diff below adds a driver for the GPIO controller found on the Intel
> > 400 Series PCH as found on (for example) the Lenovo X1 gen 8 laptop.
> > Since I don't have such hardware, I'd appreciate some tests on laptops
> > that current show:
> > 
> > "INT34BB" at acpi0 not configured
> > 
> 
> Thanks for the driver Mark! Compiles fine here but panics like this:
> ihidev0 at iic0 addr 0x2c gpio 291panic: kernel diagnostic assertion "pin >= 
> 0 && pin < sc->sc_npins" failed: file "/usr/src/sys/dev/acpi/pchgpio.c", line 
> 335
> 
> Let me know any way I can help.

Can you figure out what pin number it is trying to use?

Thanks,

Mark

P.S. Feel free to finish the driver yourself if you have time.  This
 sort of thing is way easier if you have the hardware.  The
 hardware itself should be very similar to aplgpio(4).  It's just
 that the registers moved around a bit and there is a single ACPI
 device for all the pin "communities" instead of the model of
 separate ACPI devices for each community that aplgpio(4) uses.



Re: drm: avoid possible deadlock in kthread_stop

2020-10-14 Thread Mark Kettenis
> Date: Thu, 1 Oct 2020 09:09:50 +0200
> From: Sebastien Marie 
> 
> Hi,
> 
> Currently, when a process is calling kthread_stop(), it sets a flag
> asking the thread to stop, and enters in sleep mode, but the code
> doing the stop doesn't wakeup the caller of kthread_stop().
> 
> The thread should also be unparked as else it will not seen the
> KTHREAD_SHOULDSTOP flag. it follows what Linux is doing.
> 
> While here, I added some comments in the locking logic for park/unpark
> and stop.
> 
> Comments or OK ?

I don't think adding all those comments makes a lot of sense.  This
uses a fairly standard tsleep/wakeup pattern and the some of the
comments really state the obvious.  Can you do a diff that just adds
the missing wakeup() and kthread_unpark() call?

> ---
> commit 70e71461c8598e28820f1743923cac40670f7c33
> from: Sébastien Marie 
> date: Thu Oct  1 07:02:46 2020 UTC
>  
>  properly support kthread_stop()
>  - wakeup pthread_stop() caller
>  - unpark the thread if parked
>  
>  while here, add comments for locking logic for park/unpark/stop
>  
> diff ec329a4429e2542bc24dd017b8001b22df43564c 
> ce2b5031503711bbdd7a3067c76c4f18b1d8da82
> blob - 2cbd0905406ccc9d89c86cee38673a4e9c3fcf42
> blob + f0e5a5a1b282c071c97505556510952ee7a6282a
> --- sys/dev/pci/drm/drm_linux.c
> +++ sys/dev/pci/drm/drm_linux.c
> @@ -206,6 +206,10 @@ kthread_func(void *arg)
>  
>   ret = thread->func(thread->data);
>   thread->flags |= KTHREAD_STOPPED;
> +
> + /* wakeup thread waiting in kthread_stop() */
> + wakeup(thread);
> +
>   kthread_exit(ret);
>  }
>  
> @@ -256,7 +260,14 @@ kthread_parkme(void)
>  
>   while (thread->flags & KTHREAD_SHOULDPARK) {
>   thread->flags |= KTHREAD_PARKED;
> +
> + /* 
> +  * wakeup kthread_park() caller
> +  * to signal I am parked as asked.
> +  */
>   wakeup(thread);
> +
> + /* wait for someone to kthread_unpark() me */
>   tsleep_nsec(thread, PPAUSE, "parkme", INFSLP);
>   thread->flags &= ~KTHREAD_PARKED;
>   }
> @@ -269,7 +280,13 @@ kthread_park(struct proc *p)
>  
>   while ((thread->flags & KTHREAD_PARKED) == 0) {
>   thread->flags |= KTHREAD_SHOULDPARK;
> +
>   wake_up_process(thread->proc);
> +
> + /*
> +  * wait for thread to be parked.
> +  * the asked thread should call kthread_parkme()
> +  */
>   tsleep_nsec(thread, PPAUSE, "park", INFSLP);
>   }
>  }
> @@ -280,6 +297,8 @@ kthread_unpark(struct proc *p)
>   struct kthread *thread = kthread_lookup(p);
>  
>   thread->flags &= ~KTHREAD_SHOULDPARK;
> +
> + /* wakeup kthread_parkme() caller */
>   wakeup(thread);
>  }
>  
> @@ -297,7 +316,13 @@ kthread_stop(struct proc *p)
>  
>   while ((thread->flags & KTHREAD_STOPPED) == 0) {
>   thread->flags |= KTHREAD_SHOULDSTOP;
> +
> + /* kthread_unpark() the thread if parked */
> + kthread_unpark(p);
> +
>   wake_up_process(thread->proc);
> + 
> + /* wait for thread to stop (func() should return) */
>   tsleep_nsec(thread, PPAUSE, "stop", INFSLP);
>   }
>   LIST_REMOVE(thread, next);
> 
> 
> 



Re: [PATCH] Add IOMMU support for Intel VT-d and AMD-Vi

2020-10-13 Thread Mark Kettenis
> Date: Thu, 8 Oct 2020 00:34:28 -0500
> From: Jordan Hargrave 
> 
> Ok updated the new changes.

I think this is good enough for further cleanup in the tree now.
Builds all thr amd64 kernels, doesn't break i386 and arm64 GENERIC.MP.

However, I think acpidmar(4) shouldn't be enabled yet until have done
a bit more testing.  Theo do we want to compile in the driver such
that people can easily flip it on in UKC?

I also noticed that re(4) does a bad DMA transfer as soon as the
interface is brought up. With your diff the kernel doesn't panic if
that happensl it just spits out some debugging information.  We may
want to change that in the future.

ok kettenis@ once the question about enabling acpidmar(4) is resolved.

> 
> On Mon, Oct 05, 2020 at 09:54:02PM +0200, Mark Kettenis wrote:
> > > Date: Thu, 17 Sep 2020 20:54:51 -0500
> > > From: Jordan Hargrave 
> > > Cc: ma...@peereboom.org, kette...@openbsd.org, tech@openbsd.org,
> > > d...@openbsd.org, j...@openbsd.org
> > > Content-Type: text/plain; charset=us-ascii
> > > Content-Disposition: inline
> > > 
> > > Ok made more changes
> > > 
> > > > 
> > > > Should be handled by that activate function as well.
> > > >
> > 
> > So there are quite a few style issues.  I can point them out to you,
> > or I could fix them after this is committed, which is probably more
> > efficient.
> > 
> > Also, there seems to be lot of debug code left in here that should be
> > removed or at least hidden before this gets committed.
> > 
> > > 
> > > diff --git a/sys/arch/amd64/conf/GENERIC b/sys/arch/amd64/conf/GENERIC
> > > index 1d6397391..a69c72c26 100644
> > > --- a/sys/arch/amd64/conf/GENERIC
> > > +++ b/sys/arch/amd64/conf/GENERIC
> > > @@ -45,6 +45,7 @@ acpibtn*at acpi?
> > >  acpicpu* at acpi?
> > >  acpicmos*at acpi?
> > >  acpidock*at acpi?
> > > +acpidmar0at acpi?
> > >  acpiec*  at acpi?
> > >  acpipci* at acpi?
> > >  acpiprt* at acpi?
> > > diff --git a/sys/arch/amd64/include/pci_machdep.h 
> > > b/sys/arch/amd64/include/pci_machdep.h
> > > index bc295cc22..ea09f1abc 100644
> > > --- a/sys/arch/amd64/include/pci_machdep.h
> > > +++ b/sys/arch/amd64/include/pci_machdep.h
> > > @@ -91,7 +91,12 @@ void   
> > > *pci_intr_establish_cpu(pci_chipset_tag_t, pci_intr_handle_t,
> > >   int, struct cpu_info *,
> > >   int (*)(void *), void *, const char *);
> > >  void pci_intr_disestablish(pci_chipset_tag_t, void *);
> > > +#if NACPIDMAR > 0
> > > +int  pci_probe_device_hook(pci_chipset_tag_t,
> > > + struct pci_attach_args *);
> > > +#else
> > >  #define  pci_probe_device_hook(c, a) (0)
> > > +#endif
> > 
> > This is probably a bad idea.  You don't include "acpidmar.h" in this
> > file, and doing so is probaly undesireable.  But that means the
> > definition of the hook function depends on whether the file that
> > includes this does that or not.
> > 
> > Better just unconditionally provide the prototype and use a #if
> > NACPIDMAR > 0 in the implementation.
> >
> 
> Ok changed to that method
> 
> > > +#include "acpidmar.h"
> > > +#include "amd_iommu.h"
> > > +
> > > +//#define IOMMU_DEBUG
> > 
> > No C++-style comments please.  Make this an #undef or use /* */.
> > 
> > > +
> > > +#ifdef IOMMU_DEBUG
> > > +#define dprintf(x...) printf(x)
> > > +#else
> > > +#define dprintf(x...)
> > > +#endif
> > > +
> > > +#ifdef DDB
> > > +int  acpidmar_ddb = 0;
> > > +#endif
> > > +
> > > +int  intel_iommu_gfx_mapped = 0;
> > 
> > Unused variable.
> > 
> > > +int  force_cm = 1;
> > 
> > Rename to "acpidmar_force_cm"?
> > 
> > > +
> > > +void showahci(void *);
> > 
> > Unused prototype.
> > 
> > > +
> > > +/* Page Table Entry per domain */
> > > +struct iommu_softc;
> > > +
> > > +static inline int
> > > +mksid(int b, int d, int f)
> > > +{
> > > + return (b << 8) + (d << 3) + f;
> > > +}
> > > +
> > > +static inline int
> > > +sid_devfn(int sid)
> > > +{
> > > + return sid & 0xff;
> > > +}
> > > +
> > &g

powerpc ld.lld fix

2020-10-10 Thread Mark Kettenis
On powerpc with the secure-plt ABI we need a .got section, even if the
_GLOBAL_OFFSET_TABLE_ symbol isn't referenced.  This is needed because
the first three entries of the GOT are used by the dynamic linker.

With this fix I can build executables of all flavours (including
-static/-nopie).

ok?


Index: gnu/llvm/lld/ELF/SyntheticSections.cpp
===
RCS file: /cvs/src/gnu/llvm/lld/ELF/SyntheticSections.cpp,v
retrieving revision 1.1.1.1
diff -u -p -r1.1.1.1 SyntheticSections.cpp
--- gnu/llvm/lld/ELF/SyntheticSections.cpp  3 Aug 2020 14:32:29 -   
1.1.1.1
+++ gnu/llvm/lld/ELF/SyntheticSections.cpp  10 Oct 2020 21:13:59 -
@@ -604,6 +604,8 @@ GotSection::GotSection()
   // ElfSym::globalOffsetTable.
   if (ElfSym::globalOffsetTable && !target->gotBaseSymInGotPlt)
 numEntries += target->gotHeaderEntriesNum;
+  else if (config->emachine == EM_PPC)
+numEntries += target->gotHeaderEntriesNum;
 }
 
 void GotSection::addEntry(Symbol ) {



Re: tree.h: returning void, legal but weird

2020-10-10 Thread Mark Kettenis
> Date: Sat, 10 Oct 2020 18:37:50 +0200
> From: Otto Moerbeek 
> 
> OK?

ok kettenis@

> Index: tree.h
> ===
> RCS file: /cvs/src/sys/sys/tree.h,v
> retrieving revision 1.29
> diff -u -p -r1.29 tree.h
> --- tree.h30 Jul 2017 19:27:20 -  1.29
> +++ tree.h10 Oct 2020 16:36:15 -
> @@ -910,25 +910,25 @@ _name##_RBT_PARENT(struct _type *elm)   
>  __unused static inline void  \
>  _name##_RBT_SET_LEFT(struct _type *elm, struct _type *left)  \
>  {\
> - return _rb_set_left(_name##_RBT_TYPE, elm, left);   \
> + _rb_set_left(_name##_RBT_TYPE, elm, left);  \
>  }\
>   \
>  __unused static inline void  \
>  _name##_RBT_SET_RIGHT(struct _type *elm, struct _type *right)
> \
>  {\
> - return _rb_set_right(_name##_RBT_TYPE, elm, right); \
> + _rb_set_right(_name##_RBT_TYPE, elm, right);\
>  }\
>   \
>  __unused static inline void  \
>  _name##_RBT_SET_PARENT(struct _type *elm, struct _type *parent)  
> \
>  {\
> - return _rb_set_parent(_name##_RBT_TYPE, elm, parent);   \
> + _rb_set_parent(_name##_RBT_TYPE, elm, parent);  \
>  }\
>   \
>  __unused static inline void  \
>  _name##_RBT_POISON(struct _type *elm, unsigned long poison)  \
>  {\
> - return _rb_poison(_name##_RBT_TYPE, elm, poison);   \
> + _rb_poison(_name##_RBT_TYPE, elm, poison);  \
>  }\
>   \
>  __unused static inline int   \
> 
> 



Re: Lenovo X1 gen 8 touchpad interrupt: pchgpio(4)

2020-10-10 Thread Mark Kettenis
You initially misapplied the patch (probably by being in the wrong
directory) but fixed that later.  Given the dmesg output, you ended up
with something that builts as intended.

Unfortunately the driver isn't good enough, so I'll have to go back
and see if I can figure out what's going wrong.  Before I do that, can
you send me the output of "vmstat -zi" for a kernel with my diff applied?

Thanks,

Mark



Re: timeout(9): add clock-based timeouts (attempt 2)

2020-10-09 Thread Mark Kettenis
> From: Scott Cheloha 
> Date: Fri, 9 Oct 2020 13:03:05 -0500
> 
> Hey,
> 
> > On Oct 7, 2020, at 8:49 PM, 内藤 祐一郎  wrote:
> > 
> > Hi.
> > 
> > I'm looking forward to this patch is committed.
> > Because this patch solves my problem about CARP timeout.
> > 
> > IIJ, a company that I am working for, is using carp(4) on VMware ESXi hosts
> > for VPN and web gateway services.
> > 
> > One is master and the other is backup of carp(4).
> > Active host sometimes failover to backup when the ESXi host gets high cpu 
> > usage.
> > And also CPU ready of OpenBSD machine seems high average on ESXi monitor.
> > 
> > High CPU ready machine delays sending carp advertisement for 3 or 4 seconds.
> > It is enough to failover to backup.
> > 
> > In my investigation, OpenBSD machine does not always get CPU under high CPU 
> > ready condition.
> > Although it is needed for interrupt handler.
> > The delay of calling hardclock() causes tick count up delay.
> > One delay is small but will never be resolved.
> > So total delay can reach 3 or 4 seconds while tick counts up to 100.
> > The tickless patch can solve the delay.
> > 
> > I have tried to adapt in_carp.c to the tickless attempt 2.
> > Delay of carp advertisement reduced to about 2 seconds.
> 
> I'm glad to hear it improves things.  Thanks for testing it out.
> 
> >> 2020/09/09 4:00、Mark Kettenis のメール:
> >> The diff looks reasonable to me, but I'd like to discuss the path
> >> forward with some people during the hackathon next week.
> > 
> > Is there any discussion in the hackathon?
> 
> Not that I heard.  I wasn't at the hackathon, though.
> 
> --
> 
> If I get an OK from someone I will commit what I have so far.
> 
> Where do we stand?
> 
> - The nitty gritty details in this commit -- the hashing,
>   the loops, and the basic algorithm -- haven't changed
>   in almost a year.  I'm confident they work.
> 
> - The commit itself doesn't change any behavior because no
>   existing timeouts are converted to use timeout_set_kclock().
>   So we shouldn't see any regressions like last time until
>   someone deliberately changes an existing timeout to use the
>   kclock interfaces.
> 
> The thing that needs to be decided is how to go about dragging
> the rest of the tree into using the kclock timeout interfaces.
> 
> - Should we keep a tick-based timeout interface?  If so,
>   for how long?  Linux kept theirs as a distinct interface.
>   FreeBSD discarded theirs.
> 
> - Should we quietly reimplement timeout_add_sec(9), etc.,
>   in terms of kclock timeouts or should we do a full-tree
>   API change to explicitly use timeout_in_nsec()?
> 
> I don't think we can make such decisions without putting kclock
> timeouts into the tree so people can use them.
> 
> So, are you OK with this as-is?
> 
> Anybody else?

I think this is good to go.  ok kettenis@

Did briefly discuss with Theo during k2k20 and the consensus was it
should go in after relase.  Which is now!



Lenovo X1 gen 8 touchpad interrupt: pchgpio(4)

2020-10-08 Thread Mark Kettenis
Diff below adds a driver for the GPIO controller found on the Intel
400 Series PCH as found on (for example) the Lenovo X1 gen 8 laptop.
Since I don't have such hardware, I'd appreciate some tests on laptops
that current show:

"INT34BB" at acpi0 not configured

in their dmesg.

To test, apply the diff, and do

$ cd /usr/src/sys/arch/amd64/compiler/GENERIC.MP
$ make config
$ make

probably wise to make a backup of your old kernel before installing
the new one, just in case the driver doesn't work properly.


Index: arch/amd64/conf/GENERIC
===
RCS file: /cvs/src/sys/arch/amd64/conf/GENERIC,v
retrieving revision 1.493
diff -u -p -r1.493 GENERIC
--- arch/amd64/conf/GENERIC 15 Sep 2020 18:31:14 -  1.493
+++ arch/amd64/conf/GENERIC 8 Oct 2020 20:21:54 -
@@ -65,6 +65,7 @@ aplgpio*  at acpi?
 bytgpio*   at acpi?
 chvgpio*   at acpi?
 glkgpio*   at acpi?
+pchgpio*   at acpi?
 sdhc*  at acpi?
 acpicbkbd* at acpi?
 acpials*   at acpi?
Index: dev/acpi/files.acpi
===
RCS file: /cvs/src/sys/dev/acpi/files.acpi,v
retrieving revision 1.57
diff -u -p -r1.57 files.acpi
--- dev/acpi/files.acpi 15 Sep 2020 18:31:14 -  1.57
+++ dev/acpi/files.acpi 8 Oct 2020 20:21:56 -
@@ -146,6 +146,11 @@ device glkgpio
 attach glkgpio at acpi
 file   dev/acpi/glkgpio.c  glkgpio
 
+# Intel PCH GPIO
+device pchgpio
+attach pchgpio at acpi
+file   dev/acpi/pchgpio.c  pchgpio
+
 # "Intel" Dollar Cove TI PMIC
 device tipmic
 attach tipmic at i2c
Index: dev/acpi/pchgpio.c
===
RCS file: dev/acpi/pchgpio.c
diff -N dev/acpi/pchgpio.c
--- /dev/null   1 Jan 1970 00:00:00 -
+++ dev/acpi/pchgpio.c  8 Oct 2020 20:21:56 -
@@ -0,0 +1,395 @@
+/* $OpenBSD$   */
+/*
+ * Copyright (c) 2020 Mark Kettenis
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define PCHGPIO_MAXCOM 4
+
+#define PCHGPIO_CONF_TXSTATE   0x0001
+#define PCHGPIO_CONF_RXSTATE   0x0002
+#define PCHGPIO_CONF_RXINV 0x0080
+#define PCHGPIO_CONF_RXEV_EDGE 0x0200
+#define PCHGPIO_CONF_RXEV_ZERO 0x0400
+#define PCHGPIO_CONF_RXEV_MASK 0x0600
+
+#define PCHGPIO_PADBAR 0x00c
+
+struct pchgpio_group {
+   uint8_t bar;
+   uint16_tbase;
+   uint16_tlimit;
+   uint16_toffset;
+};
+
+struct pchgpio_device {
+   uint16_tpad_own;
+   uint16_tpadcfglock;
+   uint16_thostsw_own;
+   uint16_tgpi_is;
+   uint16_tgpi_ie;
+   struct pchgpio_group *groups;
+   int ngroups;
+};
+
+struct pchgpio_match {
+   const char  *hid;
+   struct pchgpio_device *device;
+};
+
+struct pchgpio_intrhand {
+   int (*ih_func)(void *);
+   void *ih_arg;
+};
+
+struct pchgpio_softc {
+   struct device sc_dev;
+   struct acpi_softc *sc_acpi;
+   struct aml_node *sc_node;
+
+   bus_space_tag_t sc_memt[PCHGPIO_MAXCOM];
+   bus_space_handle_t sc_memh[PCHGPIO_MAXCOM];
+   void *sc_ih;
+   int sc_naddr;
+
+   struct pchgpio_device *sc_device;
+   uint16_t sc_padbar[PCHGPIO_MAXCOM];
+
+   int sc_npins;
+   struct pchgpio_intrhand *sc_pin_ih;
+
+   struct acpi_gpio sc_gpio;
+};
+
+intpchgpio_match(struct device *, void *, void *);
+void   pchgpio_attach(struct device *, struct device *, void *);
+
+struct cfattach pchgpio_ca = {
+   sizeof(struct pchgpio_softc), pchgpio_match, pchgpio_attach
+};
+
+struct cfdriver pchgpio_cd = {
+   NULL, "pchgpio", DV_DULL
+};
+
+const char *pchgpio_hids[] = {
+   "INT344B",
+   "INT34BB",
+   NULL
+};
+
+struct pchgpio_group spt_lp_groups[] =
+{
+   /* Community 0 */
+   { 0, 0, 23, 0 },/* GPP_A */
+   { 0, 24, 47, 32 },  /* GPP_B */
+
+   /* Community 1 */
+   { 1, 48, 71, 0 },   /* GPP_C */
+   { 1, 72, 95, 32 },  /* GPP_D */
+   { 1, 96, 119, 64 }, /* G

Re: diff: remove dead code in a{rm,md}64/a{rm,md}/conf.c

2020-10-07 Thread Mark Kettenis
> Date: Wed, 7 Oct 2020 15:14:32 +0200
> From: Jan Klemkow 
> 
> Hi,
> 
> the cdev_joy_init makro is just used in i386.
> 
> OK?

ok kettenis@

> Index: amd64/amd64/conf.c
> ===
> RCS file: /cvs/src/sys/arch/amd64/amd64/conf.c,v
> retrieving revision 1.71
> diff -u -p -r1.71 conf.c
> --- amd64/amd64/conf.c6 Jul 2020 04:32:25 -   1.71
> +++ amd64/amd64/conf.c7 Oct 2020 13:10:57 -
> @@ -75,13 +75,6 @@ struct bdevsw  bdevsw[] =
>  };
>  int  nblkdev = nitems(bdevsw);
>  
> -/* open, close, read, ioctl */
> -#define cdev_joy_init(c,n) { \
> - dev_init(c,n,open), dev_init(c,n,close), dev_init(c,n,read), \
> - (dev_type_write((*))) enodev, dev_init(c,n,ioctl), \
> - (dev_type_stop((*))) enodev, 0, seltrue, \
> - (dev_type_mmap((*))) enodev, 0, 0, seltrue_kqfilter }
> -
>  /* open, close, ioctl */
>  #define cdev_ocis_init(c,n) { \
>  dev_init(c,n,open), dev_init(c,n,close), (dev_type_read((*))) 
> enodev, \
> Index: arm64/arm64/conf.c
> ===
> RCS file: /cvs/src/sys/arch/arm64/arm64/conf.c,v
> retrieving revision 1.15
> diff -u -p -r1.15 conf.c
> --- arm64/arm64/conf.c6 Jul 2020 04:32:25 -   1.15
> +++ arm64/arm64/conf.c7 Oct 2020 13:10:57 -
> @@ -72,13 +72,6 @@ struct bdevsw  bdevsw[] =
>  };
>  int  nblkdev = nitems(bdevsw);
>  
> -/* open, close, read, ioctl */
> -#define cdev_joy_init(c,n) { \
> - dev_init(c,n,open), dev_init(c,n,close), dev_init(c,n,read), \
> - (dev_type_write((*))) enodev, dev_init(c,n,ioctl), \
> - (dev_type_stop((*))) enodev, 0, seltrue, \
> - (dev_type_mmap((*))) enodev, 0, 0, seltrue_kqfilter }
> -
>  /* open, close, ioctl, select -- XXX should be a generic device */
>  #define cdev_ocis_init(c,n) { \
>  dev_init(c,n,open), dev_init(c,n,close), (dev_type_read((*))) 
> enodev, \
> 
> 



Re: [PATCH] Add IOMMU support for Intel VT-d and AMD-Vi

2020-10-05 Thread Mark Kettenis
> Date: Thu, 17 Sep 2020 20:54:51 -0500
> From: Jordan Hargrave 
> Cc: ma...@peereboom.org, kette...@openbsd.org, tech@openbsd.org,
> d...@openbsd.org, j...@openbsd.org
> Content-Type: text/plain; charset=us-ascii
> Content-Disposition: inline
> 
> Ok made more changes
> 
> On Mon, Sep 14, 2020 at 08:19:18PM +0200, Mark Kettenis wrote:
> > > Date: Tue, 8 Sep 2020 21:43:39 -0500
> > > From: Jordan Hargrave 
> > > 
> > > Made changes for the iommu_readq -> iommu_read_8 and also now
> > > dynamically allocate the hwdte for AMD IOMMU.
> > 
> > Some more bits...
> > 
> > > On Fri, Sep 04, 2020 at 09:17:18PM +0200, Mark Kettenis wrote:
> > > > > Date: Fri, 4 Sep 2020 00:50:44 -0500
> > > > > From: Jordan Hargrave 
> > > > 
> > > > A few hints below...
> > > > 
> > > > > > > +
> > > > > > > +/* Page Table Entry per domain */
> > > > > > > +static struct ivhd_dte hwdte[65536] __aligned(PAGE_SIZE);
> > > > > > > +
> > > > > > > +/* Alias mapping */
> > > > > > > +#define SID_INVALID 0x8000L
> > > > > > > +static uint32_t sid_flag[65536];
> > > > > > 
> > > > > > Can we avoid having these large arrays, or at least allocate them
> > > > > > dynamically?  That would also avoid the explicit alignment which is
> > > > > > somewhat nasty since it affects the entire kernel.
> > > > > 
> > > > > OK. But the hwdte does need the 2M area to be all contiguous but it 
> > > > > is not
> > > > > needed for DMAR/Intel.  You *can* have up to 8 different device table 
> > > > > entries
> > > > > though to split up the area.
> > > > 
> > > > The appropriate interface to use in this context is
> > > > bus_dmamem_alloc(9).  You can specify alignment, and if you set nsegs
> > > > to 1, you will get memory that is physicaly contiguous.
> > > > 
> > > > To map the memory into kernel address space you'll need create a map
> > > > using bus_dmamap_create(9) and map it using bus_dmamem_map(9).  Then
> > > > instead of using pmap_extract(9) you use bus_dmamap_load_raw(9) which
> > > > then populates the physical addresses.
> > > > 
> > > > Many of the drivers written by dlg@ define convenience functions to do
> > > > all these steps, although interestingly enough he tends to use
> > > > bus_dmamap_load(9) instead of bus_dmamap_load_raw(9) which is
> > > > sub-optimal.
> > > > 
> > > > > > > +
> > > > > > > +struct domain_dev {
> > > > > > > + int sid;
> > > > > > > + int sec;
> > > > > > > + int sub;
> > > > > > > + TAILQ_ENTRY(domain_dev) link;
> > > > > > > +};
> > > > > > > +
> > > > > > > +struct domain {
> > > > > > > + struct iommu_softc  *iommu;
> > > > > > > + int did;
> > > > > > > + int gaw;
> > > > > > > + struct pte_entry*pte;
> > > > > > > + paddr_t ptep;
> > > > > > > + struct bus_dma_tag  dmat;
> > > > > > > + int flag;
> > > > > > > +
> > > > > > > + struct mutexexlck;
> > > > > > > + charexname[32];
> > > > > > > + struct extent   *iovamap;
> > > > > > > + TAILQ_HEAD(,domain_dev) devices;
> > > > > > > + TAILQ_ENTRY(domain) link;
> > > > > > > +};
> > > > > > > +
> > > > > > > +#define DOM_DEBUG 0x1
> > > > > > > +#define DOM_NOMAP 0x2
> > > > > > > +
> > > > > > > +struct dmar_devlist {
> > > > > > > + int type;
> > > > > > > + int bus;
> > > > > > > + int ndp;
> > > > > > > + struct acpidmar_devpath *dp;
> > > > > > > + TAILQ_ENTRY(dmar_devlist)   link;
> > > > > >

Re: mmap: Do not push KERNEL_LOCK() too far

2020-10-05 Thread Mark Kettenis
> Date: Mon, 5 Oct 2020 11:25:39 +0200
> From: Martin Pieuchot 
> 
> On 03/10/20(Sat) 12:59, Mark Kettenis wrote:
> > > Date: Fri, 2 Oct 2020 10:32:27 +0200
> > > From: Martin Pieuchot 
> > > 
> > > On 01/10/20(Thu) 21:44, Mark Kettenis wrote:
> > > > > Date: Thu, 1 Oct 2020 14:10:56 +0200
> > > > > From: Martin Pieuchot 
> > > > > 
> > > > > While studying a bug report from naddy@ in 2017 when testing 
> > > > > guenther@'s
> > > > > amap/anon locking diff I figured out that we have been too optimistic 
> > > > > in
> > > > > the !MAP_ANON case.
> > > > > 
> > > > > The reported panic involves, I'd guess, a race between fd_getfile() 
> > > > > and
> > > > > vref():
> > > > > 
> > > > >   panic: vref used where vget required
> > > > >   db_enter() at db_enter+0x5
> > > > >   panic() at panic+0x129
> > > > >   vref(ff03b20d29e8) at vref+0x5d
> > > > >   uvn_attach(101,ff03a5879dc0) at uvn_attach+0x11d
> > > > >   uvm_mmapfile(7,ff03a5879dc0,2,1,13,10012) at 
> > > > > uvm_mmapfile+0x12c
> > > > >   sys_mmap(c50,8000225f82a0,1) at sys_mmap+0x604
> > > > >   syscall() at syscall+0x279
> > > > >   --- syscall (number 198) ---
> > > > >   end of kernel
> > > > > 
> > > > > Removing the KERNEL_LOCK() from file mapping was out of the scope of 
> > > > > this
> > > > > previous work, so I'd like to go back to a single KERNEL_LOCK/UNLOCK 
> > > > > dance
> > > > > in this code path to remove any false positive.
> > > > > 
> > > > > Note that this code is currently always run under KERNEL_LOCK() so 
> > > > > this
> > > > > will only have effect once the syscall will be unlocked.
> > > > > 
> > > > > ok?
> > > > 
> > > > Hmm, I thought fd_getfile() was fully mpsafe.
> > > 
> > > It is to get a reference on `fp'.  However if the current thread
> > > releases the KERNEL_LOCK() before calling vref(9) it might lose a
> > > race.
> > 
> > I don't see the race.  The function returns a 'fp' with a reference,
> > so 'fp' will be valid regardless of whether we hold the kernel lock or
> > not.  So we should be able to take the kernel lock after the
> > fd_getfile() call isn't it?
> 
> Should we?  I'd assume we can't unless somebody can explain the
> contrary.  My point is the following: by releasing the KERNEL_LOCK() we
> allow other parts of the kernel: syscalls and fault handlers to mess
> with this vnode.
> 
> > > > But I suppose the kernel lock needs to be grabbed before we start
> > > > looking at the vnode?
> > > 
> > > Yes, or to say it otherwise not released.
> > 
> > So the problem is that while we have an 'fp', its f_data member points
> > to a vnode that has already been put on the freelist and therefore has
> > v_usecount set to zero?  How does that happen?
> 
> I don't know.  I'm trying to be conservative to be able to concentrate
> on amaps & anons.  I'd rather keep all the rest under a single
> KERNEL_LOCK().
> 
> Hopefully this can be revisited soon.

Fair enough.  ok kettenis@

> > > > Your diff makes the locking a bit convoluted, but I suppose adding a
> > > > KERNEL_UNLOCK() before every "goto out" is worse?
> > > 
> > > I tried to keep the diff as small as possible to not obfuscate the change.
> > > If we want cleaner code we can move the !ANON case in a different 
> > > function.
> > 
> > Splitting would be hard because of the "goto is_anon".
> > 
> > > > > Index: uvm/uvm_mmap.c
> > > > > ===
> > > > > RCS file: /cvs/src/sys/uvm/uvm_mmap.c,v
> > > > > retrieving revision 1.161
> > > > > diff -u -p -r1.161 uvm_mmap.c
> > > > > --- uvm/uvm_mmap.c4 Mar 2020 21:15:39 -   1.161
> > > > > +++ uvm/uvm_mmap.c28 Sep 2020 09:48:26 -
> > > > > @@ -288,8 +288,11 @@ sys_mmap(struct proc *p, void *v, regist
> > > > >  
> > > > >   /* check for file mappings (i.e. not anonymous) and verify 
> > > > > file. */
> > > > > 

arm64 dt(4) improvements

2020-10-03 Thread Mark Kettenis
Diff below has two dt(4) improvements.  It fixes the construction of
the call frame that accompanies the stack frame.  Currently it sticks
the saved copy of the LR register in there.  But instead it should
stick the value of the ELR in there to record the location where the
trap happened.  This fixes stack traces through traps.

Then it adds the magic defines to strip off the frames that are't
interesting.

ok?


Index: dev/dt/dt_dev.c
===
RCS file: /cvs/src/sys/dev/dt/dt_dev.c,v
retrieving revision 1.10
diff -u -p -r1.10 dt_dev.c
--- dev/dt/dt_dev.c 28 Sep 2020 13:16:58 -  1.10
+++ dev/dt/dt_dev.c 3 Oct 2020 17:54:20 -
@@ -56,6 +56,9 @@
 #if defined(__amd64__)
 #define DT_FA_PROFILE  5
 #define DT_FA_STATIC   2
+#elif defined(__arm64__)
+#define DT_FA_PROFILE  7
+#define DT_FA_STATIC   2
 #elif defined(__powerpc64__)
 #define DT_FA_PROFILE  6
 #define DT_FA_STATIC   2
Index: arch/arm64/arm64/exception.S
===
RCS file: /cvs/src/sys/arch/arm64/arm64/exception.S,v
retrieving revision 1.11
diff -u -p -r1.11 exception.S
--- arch/arm64/arm64/exception.S17 Mar 2020 17:27:12 -  1.11
+++ arch/arm64/arm64/exception.S3 Oct 2020 17:54:21 -
@@ -38,7 +38,6 @@
sub sp, sp, #128
 .endif
sub sp, sp, #(TF_SIZE + 16)
-   stp x29, x30, [sp, #(TF_SIZE)]
stp x28, x29, [sp, #(TF_X + 28 * 8)]
stp x26, x27, [sp, #(TF_X + 26 * 8)]
stp x24, x25, [sp, #(TF_X + 24 * 8)]
@@ -60,6 +59,7 @@
mrs x18, sp_el0
 .endif
mov fp, x18
+   stp x29, x10, [sp, #(TF_SIZE)]
stp x10, x11, [sp, #(TF_ELR)]
stp x18, lr, [sp, #(TF_SP)]
mrs x18, tpidr_el1



Support astfb(4) in wsfb(4)

2020-10-03 Thread Mark Kettenis
The astfb(4) is a little-endian framebuffer on a (for now) big-endian
architecture.  Therefore we need to tell X that the pixels have their
color components laid out in a non-standard way.

Note that support for this pixel layout in X is weak.  Normal stuff
works but the software rendering in Mesa doesn't seem to work
properly.  So while this is good enough to get a bunch of xterms on
the screen, glxgears will have the wrong colors.

ok?

P.S. I don't think basing on the wsdisplay type is the right thing to
 do, but it is what we have done in the past.  Maybe we should
 extend wsdisplay_fbinfo with some fields that communicate the
 pixel format and use that?


Index: driver/xf86-video-wsfb/src/wsfb_driver.c
===
RCS file: /cvs/xenocara/driver/xf86-video-wsfb/src/wsfb_driver.c,v
retrieving revision 1.38
diff -u -p -r1.38 wsfb_driver.c
--- driver/xf86-video-wsfb/src/wsfb_driver.c27 Jul 2019 07:48:19 -  
1.38
+++ driver/xf86-video-wsfb/src/wsfb_driver.c3 Oct 2020 14:39:18 -
@@ -632,6 +632,17 @@ WsfbPreInit(ScrnInfoPtr pScrn, int flags
masks.blue = 0x1f;
}
break;
+   case WSDISPLAY_TYPE_ASTFB:
+   if (pScrn->depth > 16) {
+   masks.red = 0xff00;
+   masks.green = 0x00ff;
+   masks.blue = 0xff00;
+   } else {
+   masks.red = 0x1f;
+   masks.green = 0x3f << 5;
+   masks.blue = 0x1f << 11;
+   }
+   break;
default:
masks.red = 0;
masks.green = 0;



Re: mmap: Do not push KERNEL_LOCK() too far

2020-10-03 Thread Mark Kettenis
> Date: Fri, 2 Oct 2020 10:32:27 +0200
> From: Martin Pieuchot 
> 
> On 01/10/20(Thu) 21:44, Mark Kettenis wrote:
> > > Date: Thu, 1 Oct 2020 14:10:56 +0200
> > > From: Martin Pieuchot 
> > > 
> > > While studying a bug report from naddy@ in 2017 when testing guenther@'s
> > > amap/anon locking diff I figured out that we have been too optimistic in
> > > the !MAP_ANON case.
> > > 
> > > The reported panic involves, I'd guess, a race between fd_getfile() and
> > > vref():
> > > 
> > >   panic: vref used where vget required
> > >   db_enter() at db_enter+0x5
> > >   panic() at panic+0x129
> > >   vref(ff03b20d29e8) at vref+0x5d
> > >   uvn_attach(101,ff03a5879dc0) at uvn_attach+0x11d
> > >   uvm_mmapfile(7,ff03a5879dc0,2,1,13,10012) at uvm_mmapfile+0x12c
> > >   sys_mmap(c50,8000225f82a0,1) at sys_mmap+0x604
> > >   syscall() at syscall+0x279
> > >   --- syscall (number 198) ---
> > >   end of kernel
> > > 
> > > Removing the KERNEL_LOCK() from file mapping was out of the scope of this
> > > previous work, so I'd like to go back to a single KERNEL_LOCK/UNLOCK dance
> > > in this code path to remove any false positive.
> > > 
> > > Note that this code is currently always run under KERNEL_LOCK() so this
> > > will only have effect once the syscall will be unlocked.
> > > 
> > > ok?
> > 
> > Hmm, I thought fd_getfile() was fully mpsafe.
> 
> It is to get a reference on `fp'.  However if the current thread
> releases the KERNEL_LOCK() before calling vref(9) it might lose a
> race.

I don't see the race.  The function returns a 'fp' with a reference,
so 'fp' will be valid regardless of whether we hold the kernel lock or
not.  So we should be able to take the kernel lock after the
fd_getfile() call isn't it?

> > But I suppose the kernel lock needs to be grabbed before we start
> > looking at the vnode?
> 
> Yes, or to say it otherwise not released.

So the problem is that while we have an 'fp', its f_data member points
to a vnode that has already been put on the freelist and therefore has
v_usecount set to zero?  How does that happen?

> > Your diff makes the locking a bit convoluted, but I suppose adding a
> > KERNEL_UNLOCK() before every "goto out" is worse?
> 
> I tried to keep the diff as small as possible to not obfuscate the change.
> If we want cleaner code we can move the !ANON case in a different function.

Splitting would be hard because of the "goto is_anon".

> > > Index: uvm/uvm_mmap.c
> > > ===
> > > RCS file: /cvs/src/sys/uvm/uvm_mmap.c,v
> > > retrieving revision 1.161
> > > diff -u -p -r1.161 uvm_mmap.c
> > > --- uvm/uvm_mmap.c4 Mar 2020 21:15:39 -   1.161
> > > +++ uvm/uvm_mmap.c28 Sep 2020 09:48:26 -
> > > @@ -288,8 +288,11 @@ sys_mmap(struct proc *p, void *v, regist
> > >  
> > >   /* check for file mappings (i.e. not anonymous) and verify file. */
> > >   if ((flags & MAP_ANON) == 0) {
> > > - if ((fp = fd_getfile(fdp, fd)) == NULL)
> > > - return (EBADF);
> > > + KERNEL_LOCK();
> > > + if ((fp = fd_getfile(fdp, fd)) == NULL) {
> > > + error = EBADF;
> > > + goto out;
> > > + }
> > >  
> > >   if (fp->f_type != DTYPE_VNODE) {
> > >   error = ENODEV; /* only mmap vnodes! */
> > > @@ -313,6 +316,7 @@ sys_mmap(struct proc *p, void *v, regist
> > >   flags |= MAP_ANON;
> > >   FRELE(fp, p);
> > >   fp = NULL;
> > > + KERNEL_UNLOCK();
> > >   goto is_anon;
> > >   }
> > >  
> > > @@ -362,9 +366,7 @@ sys_mmap(struct proc *p, void *v, regist
> > >* EPERM.
> > >*/
> > >   if (fp->f_flag & FWRITE) {
> > > - KERNEL_LOCK();
> > >   error = VOP_GETATTR(vp, , p->p_ucred, p);
> > > - KERNEL_UNLOCK();
> > >   if (error)
> > >   goto out;
> > >   if ((va.va_flags & (IMMUTABLE|APPEND)) == 0)
> > > @@ -390,9 +392,9 @@ sys_mmap(struct proc *p, void *v, regist
> > >   goto out;
> > >   }
> > >   }
> > > - KERNEL_LOCK();
> > >   error = uvm_mmapfile(>p_vmspace->vm_map, , size, prot,
> > >   maxprot, flags, vp, pos, lim_cur(RLIMIT_MEMLOCK), p);
> > > + FRELE(fp, p);
> > >   KERNEL_UNLOCK();
> > >   } else {/* MAP_ANON case */
> > >   if (fd != -1)
> > > @@ -428,7 +430,10 @@ is_anon: /* label for SunOS style /dev/z
> > >   /* remember to add offset */
> > >   *retval = (register_t)(addr + pageoff);
> > >  
> > > + return (error);
> > > +
> > >  out:
> > > + KERNEL_UNLOCK();
> > >   if (fp)
> > >   FRELE(fp, p);
> > >   return (error);
> > > 
> > > 
> 



Re: mmap: Do not push KERNEL_LOCK() too far

2020-10-01 Thread Mark Kettenis
> Date: Thu, 1 Oct 2020 14:10:56 +0200
> From: Martin Pieuchot 
> 
> While studying a bug report from naddy@ in 2017 when testing guenther@'s
> amap/anon locking diff I figured out that we have been too optimistic in
> the !MAP_ANON case.
> 
> The reported panic involves, I'd guess, a race between fd_getfile() and
> vref():
> 
>   panic: vref used where vget required
>   db_enter() at db_enter+0x5
>   panic() at panic+0x129
>   vref(ff03b20d29e8) at vref+0x5d
>   uvn_attach(101,ff03a5879dc0) at uvn_attach+0x11d
>   uvm_mmapfile(7,ff03a5879dc0,2,1,13,10012) at uvm_mmapfile+0x12c
>   sys_mmap(c50,8000225f82a0,1) at sys_mmap+0x604
>   syscall() at syscall+0x279
>   --- syscall (number 198) ---
>   end of kernel
> 
> Removing the KERNEL_LOCK() from file mapping was out of the scope of this
> previous work, so I'd like to go back to a single KERNEL_LOCK/UNLOCK dance
> in this code path to remove any false positive.
> 
> Note that this code is currently always run under KERNEL_LOCK() so this
> will only have effect once the syscall will be unlocked.
> 
> ok?

Hmm, I thought fd_getfile() was fully mpsafe.

But I suppose the kernel lock needs to be grabbed before we start
looking at the vnode?

Your diff makes the locking a bit convoluted, but I suppose adding a
KERNEL_UNLOCK() before every "goto out" is worse?


> Index: uvm/uvm_mmap.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_mmap.c,v
> retrieving revision 1.161
> diff -u -p -r1.161 uvm_mmap.c
> --- uvm/uvm_mmap.c4 Mar 2020 21:15:39 -   1.161
> +++ uvm/uvm_mmap.c28 Sep 2020 09:48:26 -
> @@ -288,8 +288,11 @@ sys_mmap(struct proc *p, void *v, regist
>  
>   /* check for file mappings (i.e. not anonymous) and verify file. */
>   if ((flags & MAP_ANON) == 0) {
> - if ((fp = fd_getfile(fdp, fd)) == NULL)
> - return (EBADF);
> + KERNEL_LOCK();
> + if ((fp = fd_getfile(fdp, fd)) == NULL) {
> + error = EBADF;
> + goto out;
> + }
>  
>   if (fp->f_type != DTYPE_VNODE) {
>   error = ENODEV; /* only mmap vnodes! */
> @@ -313,6 +316,7 @@ sys_mmap(struct proc *p, void *v, regist
>   flags |= MAP_ANON;
>   FRELE(fp, p);
>   fp = NULL;
> + KERNEL_UNLOCK();
>   goto is_anon;
>   }
>  
> @@ -362,9 +366,7 @@ sys_mmap(struct proc *p, void *v, regist
>* EPERM.
>*/
>   if (fp->f_flag & FWRITE) {
> - KERNEL_LOCK();
>   error = VOP_GETATTR(vp, , p->p_ucred, p);
> - KERNEL_UNLOCK();
>   if (error)
>   goto out;
>   if ((va.va_flags & (IMMUTABLE|APPEND)) == 0)
> @@ -390,9 +392,9 @@ sys_mmap(struct proc *p, void *v, regist
>   goto out;
>   }
>   }
> - KERNEL_LOCK();
>   error = uvm_mmapfile(>p_vmspace->vm_map, , size, prot,
>   maxprot, flags, vp, pos, lim_cur(RLIMIT_MEMLOCK), p);
> + FRELE(fp, p);
>   KERNEL_UNLOCK();
>   } else {/* MAP_ANON case */
>   if (fd != -1)
> @@ -428,7 +430,10 @@ is_anon: /* label for SunOS style /dev/z
>   /* remember to add offset */
>   *retval = (register_t)(addr + pageoff);
>  
> + return (error);
> +
>  out:
> + KERNEL_UNLOCK();
>   if (fp)
>   FRELE(fp, p);
>   return (error);
> 
> 



dt(4) tweaks for powerpc64

2020-09-25 Thread Mark Kettenis
This adds the magic numbers for powerpc64 to cut away the noise in
stack traces.

ok?


Index: dev/dt/dt_dev.c
===
RCS file: /cvs/src/sys/dev/dt/dt_dev.c,v
retrieving revision 1.9
diff -u -p -r1.9 dt_dev.c
--- dev/dt/dt_dev.c 13 Aug 2020 11:28:31 -  1.9
+++ dev/dt/dt_dev.c 25 Sep 2020 17:40:58 -
@@ -56,6 +56,9 @@
 #if defined(__amd64__)
 #define DT_FA_PROFILE  5
 #define DT_FA_STATIC   2
+#elif defined(__powerpc64__)
+#define DT_FA_PROFILE  6
+#define DT_FA_STATIC   2
 #elif defined(__sparc64__)
 #define DT_FA_PROFILE  5
 #define DT_FA_STATIC   1



Re: amap: panic -> KASSERT

2020-09-25 Thread Mark Kettenis
> From: Scott Cheloha 
> Date: Fri, 25 Sep 2020 09:48:20 -0400
> 
> > On Sep 24, 2020, at 07:43, Theo de Raadt  wrote:
> > 
> > Mark Kettenis  wrote:
> > 
> >>> Date: Thu, 24 Sep 2020 11:53:59 +0200
> >>> From: Martin Pieuchot 
> >>> 
> >>> Convert various "if (x) panic()" idioms into "KASSERT(!x)".  The panic
> >>> message isn't helping for such sanity checks and this help reducing the
> >>> diff with NetBSD.
> >>> 
> >>> ok?
> >> 
> >> Yes, the KASSERTs are probably more useful for debugging.  The
> >> downside is that we lose the checks in RAMDISK kernels.  The upside of
> >> that is that it makes the kernel smaller.
> >> 
> >> ok kettenis@
> > 
> > That's the complete assessment of the situation, and on the scale I'm
> > happy with the diff.
> > 
> > ok deraadt
> 
> Is there any scenario where
> 
> if (condition)
> panic();
> 
> is preferable to
> 
> KASSERT(condition);
> 
> outside of function calls with side effects?

Apart from what deraadt@ already said, KASSERT() really should only be
used for "this really shouldn't happen" cases.  Whereas the panic can
be used for cases that can happen but we don't know how to (or don't
want to) handle.

And as I implied, KASSERTs are no-ops in RAMDISK kernels so they don't
stop things going horribly of the rails in that context, whereas a
panic would still happen.



Re: Call uvm_grow() on armv7

2020-09-25 Thread Mark Kettenis
> Date: Fri, 25 Sep 2020 15:18:40 +0200
> From: Peter Hessler 
> 
> On 2020 Sep 25 (Fri) at 14:51:01 +0200 (+0200), Mark Kettenis wrote:
> :> Date: Fri, 25 Sep 2020 14:38:23 +0200
> :> From: Peter Hessler 
> :> 
> :> After Mark noticed that arm64 didn't have it, I checked armv7 and it
> :> also doesn't have it.
> :> 
> :> Successfully tested on a Tinker-RK3288
> :> 
> :> OK?
> :> 
> :> 
> :> Index: sys/arch/arm/arm/fault.c
> :> ===
> :> RCS file: /home/cvs/openbsd/src/sys/arch/arm/arm/fault.c,v
> :> retrieving revision 1.41
> :> diff -u -p -u -p -r1.41 fault.c
> :> --- sys/arch/arm/arm/fault.c   14 Sep 2020 18:23:32 -  1.41
> :> +++ sys/arch/arm/arm/fault.c   25 Sep 2020 06:21:48 -
> :> @@ -331,6 +331,8 @@ data_abort_handler(trapframe_t *tf)
> :>pcb->pcb_onfault = NULL;
> :>KERNEL_LOCK();
> :>error = uvm_fault(map, va, 0, ftype);
> :> +  if (error == 0)
> :
> :You need a map != kernel_map check here as this trap can be called for
> :both kernel faults and userland faults.
> :
> :> +  uvm_grow(p, va);
> :>KERNEL_UNLOCK();
> :>pcb->pcb_onfault = onfault;
> :>  
> :> @@ -588,6 +590,8 @@ prefetch_abort_handler(trapframe_t *tf)
> :>  
> :>KERNEL_LOCK();
> :>error = uvm_fault(map, va, 0, PROT_READ | PROT_EXEC);
> :> +  if (error == 0)
> :
> :But not here since this one always has map != kernel_map.
> :
> :> +  uvm_grow(p, va);
> :>KERNEL_UNLOCK();
> :>if (__predict_true(error == 0))
> :>goto out;
> :> 
> 
> Thanks, fixed.

ok kettenis@

> Index: sys/arch/arm/arm/fault.c
> ===
> RCS file: /home/cvs/openbsd/src/sys/arch/arm/arm/fault.c,v
> retrieving revision 1.41
> diff -u -p -u -p -r1.41 fault.c
> --- sys/arch/arm/arm/fault.c  14 Sep 2020 18:23:32 -  1.41
> +++ sys/arch/arm/arm/fault.c  25 Sep 2020 13:16:55 -
> @@ -331,6 +331,8 @@ data_abort_handler(trapframe_t *tf)
>   pcb->pcb_onfault = NULL;
>   KERNEL_LOCK();
>   error = uvm_fault(map, va, 0, ftype);
> + if (error == 0 && map != kernel_map)
> + uvm_grow(p, va);
>   KERNEL_UNLOCK();
>   pcb->pcb_onfault = onfault;
>  
> @@ -588,6 +590,8 @@ prefetch_abort_handler(trapframe_t *tf)
>  
>   KERNEL_LOCK();
>   error = uvm_fault(map, va, 0, PROT_READ | PROT_EXEC);
> + if (error == 0)
> + uvm_grow(p, va);
>   KERNEL_UNLOCK();
>   if (__predict_true(error == 0))
>   goto out;
> 
> 
> 
> 
> :> 
> :> 
> :> On 2020 Sep 24 (Thu) at 23:16:08 +0200 (+0200), Mark Kettenis wrote:
> :> :The call is missing from the trap handler, probably because I was
> :> :looking at arm64 where it is missing as well.  The result is that the
> :> :stack size accounting will be wrong.
> :> :
> :> :In the diff below I only added the call to the "data" trap.  That
> :> :means that an "instruction" trap will not run the accounting code.  Is
> :> :that correct?  The uvm_fault() call should never return success in
> :> :that case unless the stack has been mapped executable...
> :> :
> 
> -- 
> Millihelen, adj:
>   The amount of beauty required to launch one ship.
> 



Re: Call uvm_grow() on armv7

2020-09-25 Thread Mark Kettenis
> Date: Fri, 25 Sep 2020 14:38:23 +0200
> From: Peter Hessler 
> 
> After Mark noticed that arm64 didn't have it, I checked armv7 and it
> also doesn't have it.
> 
> Successfully tested on a Tinker-RK3288
> 
> OK?
> 
> 
> Index: sys/arch/arm/arm/fault.c
> ===
> RCS file: /home/cvs/openbsd/src/sys/arch/arm/arm/fault.c,v
> retrieving revision 1.41
> diff -u -p -u -p -r1.41 fault.c
> --- sys/arch/arm/arm/fault.c  14 Sep 2020 18:23:32 -  1.41
> +++ sys/arch/arm/arm/fault.c  25 Sep 2020 06:21:48 -
> @@ -331,6 +331,8 @@ data_abort_handler(trapframe_t *tf)
>   pcb->pcb_onfault = NULL;
>   KERNEL_LOCK();
>   error = uvm_fault(map, va, 0, ftype);
> + if (error == 0)

You need a map != kernel_map check here as this trap can be called for
both kernel faults and userland faults.

> + uvm_grow(p, va);
>   KERNEL_UNLOCK();
>   pcb->pcb_onfault = onfault;
>  
> @@ -588,6 +590,8 @@ prefetch_abort_handler(trapframe_t *tf)
>  
>   KERNEL_LOCK();
>   error = uvm_fault(map, va, 0, PROT_READ | PROT_EXEC);
> + if (error == 0)

But not here since this one always has map != kernel_map.

> + uvm_grow(p, va);
>   KERNEL_UNLOCK();
>   if (__predict_true(error == 0))
>   goto out;
> 
> 
> 
> On 2020 Sep 24 (Thu) at 23:16:08 +0200 (+0200), Mark Kettenis wrote:
> :The call is missing from the trap handler, probably because I was
> :looking at arm64 where it is missing as well.  The result is that the
> :stack size accounting will be wrong.
> :
> :In the diff below I only added the call to the "data" trap.  That
> :means that an "instruction" trap will not run the accounting code.  Is
> :that correct?  The uvm_fault() call should never return success in
> :that case unless the stack has been mapped executable...
> :
> 
> 
> -- 
> Nature is by and large to be found out of doors, a location where, it
> cannot be argued, there are never enough comfortable chairs.
>   -- Fran Leibowitz
> 



Re: Call uvm_grow() on powerpc64

2020-09-24 Thread Mark Kettenis
> From: Theo de Raadt 
> Date: Thu, 24 Sep 2020 15:27:06 -0600 (MDT)
> 
> >The call is missing from the trap handler, probably because I was
> >looking at arm64 where it is missing as well.  The result is that the
> >stack size accounting will be wrong.
> 
> Nice find.
> 
> >In the diff below I only added the call to the "data" trap.  That
> >means that an "instruction" trap will not run the accounting code.  Is
> >that correct?  The uvm_fault() call should never return success in
> >that case unless the stack has been mapped executable...
> 
> I think both should have it.  munmap and mprotect exist, and
> people can do strange things.

Which would be this diff.  ok?


Index: arch/powerpc64/powerpc64/trap.c
===
RCS file: /cvs/src/sys/arch/powerpc64/powerpc64/trap.c,v
retrieving revision 1.39
diff -u -p -r1.39 trap.c
--- arch/powerpc64/powerpc64/trap.c 24 Sep 2020 20:22:15 -  1.39
+++ arch/powerpc64/powerpc64/trap.c 24 Sep 2020 21:36:27 -
@@ -181,6 +181,8 @@ trap(struct trapframe *frame)
ftype = PROT_READ;
KERNEL_LOCK();
error = uvm_fault(map, trunc_page(va), 0, ftype);
+   if (error == 0)
+   uvm_grow(p, trunc_page(va));
KERNEL_UNLOCK();
if (error) {
 #ifdef TRAP_DEBUG
@@ -225,6 +227,8 @@ trap(struct trapframe *frame)
ftype = PROT_READ | PROT_EXEC;
KERNEL_LOCK();
error = uvm_fault(map, trunc_page(va), 0, ftype);
+   if (error == 0)
+   uvm_grow(p, trunc_page(va));
KERNEL_UNLOCK();
if (error) {
 #ifdef TRAP_DEBUG



Call uvm_grow() on arm64

2020-09-24 Thread Mark Kettenis
As noted in the "Call uvm_grow() on powerpc64" mail, arm64 doesn't
call it either.  Diff below fixes that.

ok?


Index: arch/arm64/arm64/trap.c
===
RCS file: /cvs/src/sys/arch/arm64/arm64/trap.c,v
retrieving revision 1.30
diff -u -p -r1.30 trap.c
--- arch/arm64/arm64/trap.c 14 Sep 2020 19:44:01 -  1.30
+++ arch/arm64/arm64/trap.c 24 Sep 2020 21:26:15 -
@@ -144,6 +144,8 @@ data_abort(struct trapframe *frame, uint
if (!pmap_fault_fixup(map->pmap, va, access_type, 1)) {
KERNEL_LOCK();
error = uvm_fault(map, va, ftype, access_type);
+   if (error == 0)
+   uvm_grow(p, va);
KERNEL_UNLOCK();
}
} else {



Call uvm_grow() on powerpc64

2020-09-24 Thread Mark Kettenis
The call is missing from the trap handler, probably because I was
looking at arm64 where it is missing as well.  The result is that the
stack size accounting will be wrong.

In the diff below I only added the call to the "data" trap.  That
means that an "instruction" trap will not run the accounting code.  Is
that correct?  The uvm_fault() call should never return success in
that case unless the stack has been mapped executable...


Index: arch/powerpc64/powerpc64/trap.c
===
RCS file: /cvs/src/sys/arch/powerpc64/powerpc64/trap.c,v
retrieving revision 1.39
diff -u -p -r1.39 trap.c
--- arch/powerpc64/powerpc64/trap.c 24 Sep 2020 20:22:15 -  1.39
+++ arch/powerpc64/powerpc64/trap.c 24 Sep 2020 21:11:08 -
@@ -181,6 +181,8 @@ trap(struct trapframe *frame)
ftype = PROT_READ;
KERNEL_LOCK();
error = uvm_fault(map, trunc_page(va), 0, ftype);
+   if (error == 0)
+   uvm_grow(p, trunc_page(va));
KERNEL_UNLOCK();
if (error) {
 #ifdef TRAP_DEBUG



Push back kernel lock a bit in amd64 pageflttrap()

2020-09-24 Thread Mark Kettenis
This avoids taking the kernel lock when ci_inatomic is set.  This
might speed up inteldrm(4) a bit.  Since uvm_grow() still needs the
kernel lock, some reorganization of the code is necessary.

I'm not sure this actaully has an impact.  If we end up here with
ci_inatomic set we're going to return EFAULT and take a slow path
anyway.  So maybe it is better to leave this until we make uvm_grow()
mpsafe?


Index: arch/amd64/amd64/trap.c
===
RCS file: /cvs/src/sys/arch/amd64/amd64/trap.c,v
retrieving revision 1.81
diff -u -p -r1.81 trap.c
--- arch/amd64/amd64/trap.c 14 Sep 2020 12:51:28 -  1.81
+++ arch/amd64/amd64/trap.c 24 Sep 2020 14:21:24 -
@@ -173,11 +173,10 @@ pageflttrap(struct trapframe *frame, uin
pcb = >p_addr->u_pcb;
va = trunc_page((vaddr_t)cr2);
 
-   KERNEL_LOCK();
-
if (!usermode) {
/* This will only trigger if SMEP is enabled */
if (cr2 <= VM_MAXUSER_ADDRESS && frame->tf_err & PGEX_I) {
+   KERNEL_LOCK();
fault("attempt to execute user address %p "
"in supervisor mode", (void *)cr2);
/* retain kernel lock */
@@ -186,6 +185,7 @@ pageflttrap(struct trapframe *frame, uin
/* This will only trigger if SMAP is enabled */
if (pcb->pcb_onfault == NULL && cr2 <= VM_MAXUSER_ADDRESS &&
frame->tf_err & PGEX_P) {
+   KERNEL_LOCK();
fault("attempt to access user address %p "
"in supervisor mode", (void *)cr2);
/* retain kernel lock */
@@ -216,28 +216,29 @@ pageflttrap(struct trapframe *frame, uin
caddr_t onfault = pcb->pcb_onfault;
 
pcb->pcb_onfault = NULL;
+   KERNEL_LOCK();
error = uvm_fault(map, va, frame->tf_err & PGEX_P ?
VM_FAULT_PROTECT : VM_FAULT_INVALID, ftype);
+   if (error == 0 && map != kernel_map)
+   uvm_grow(p, va);
+   KERNEL_UNLOCK();
pcb->pcb_onfault = onfault;
} else
error = EFAULT;
 
-   if (error == 0) {
-   if (map != kernel_map)
-   uvm_grow(p, va);
-   } else if (!usermode) {
+   if (error && !usermode) {
if (pcb->pcb_onfault != 0) {
-   KERNEL_UNLOCK();
frame->tf_rip = (u_int64_t)pcb->pcb_onfault;
return 1;
} else {
/* bad memory access in the kernel */
+   KERNEL_LOCK();
fault("uvm_fault(%p, 0x%llx, 0, %d) -> %x",
map, cr2, ftype, error);
/* retain kernel lock */
return 0;
}
-   } else {
+   } else if (error) {
union sigval sv;
int signal, sicode;
 
@@ -260,8 +261,6 @@ pageflttrap(struct trapframe *frame, uin
sv.sival_ptr = (void *)cr2;
trapsignal(p, signal, T_PAGEFLT, sicode, sv);
}
-
-   KERNEL_UNLOCK();
 
return 1;
 }



Re: amap: panic -> KASSERT

2020-09-24 Thread Mark Kettenis
> Date: Thu, 24 Sep 2020 11:53:59 +0200
> From: Martin Pieuchot 
> 
> Convert various "if (x) panic()" idioms into "KASSERT(!x)".  The panic
> message isn't helping for such sanity checks and this help reducing the
> diff with NetBSD.
> 
> ok?

Yes, the KASSERTs are probably more useful for debugging.  The
downside is that we lose the checks in RAMDISK kernels.  The upside of
that is that it makes the kernel smaller.

ok kettenis@

> Index: uvm/uvm_amap.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_amap.c,v
> retrieving revision 1.83
> diff -u -p -r1.83 uvm_amap.c
> --- uvm/uvm_amap.c22 Sep 2020 14:31:08 -  1.83
> +++ uvm/uvm_amap.c24 Sep 2020 09:47:54 -
> @@ -1019,9 +1019,7 @@ amap_lookup(struct vm_aref *aref, vaddr_
>  
>   AMAP_B2SLOT(slot, offset);
>   slot += aref->ar_pageoff;
> -
> - if (slot >= amap->am_nslot)
> - panic("amap_lookup: offset out of range");
> + KASSERT(slot < amap->am_nslot);
>  
>   chunk = amap_chunk_get(amap, slot, 0, PR_NOWAIT);
>   if (chunk == NULL)
> @@ -1046,8 +1044,7 @@ amap_lookups(struct vm_aref *aref, vaddr
>   AMAP_B2SLOT(slot, offset);
>   slot += aref->ar_pageoff;
>  
> - if ((slot + (npages - 1)) >= amap->am_nslot)
> - panic("amap_lookups: offset out of range");
> + KASSERT((slot + (npages - 1)) < amap->am_nslot);
>  
>   for (i = 0, lcv = slot; lcv < slot + npages; i += n, lcv += n) {
>   n = UVM_AMAP_CHUNK - UVM_AMAP_SLOTIDX(lcv);
> @@ -1078,9 +1075,7 @@ amap_populate(struct vm_aref *aref, vadd
>  
>   AMAP_B2SLOT(slot, offset);
>   slot += aref->ar_pageoff;
> -
> - if (slot >= amap->am_nslot)
> - panic("amap_populate: offset out of range");
> + KASSERT(slot < amap->am_nslot);
>  
>   chunk = amap_chunk_get(amap, slot, 1, PR_WAITOK);
>   KASSERT(chunk != NULL);
> @@ -1101,9 +1096,8 @@ amap_add(struct vm_aref *aref, vaddr_t o
>  
>   AMAP_B2SLOT(slot, offset);
>   slot += aref->ar_pageoff;
> + KASSERT(slot < amap->am_nslot);
>  
> - if (slot >= amap->am_nslot)
> - panic("amap_add: offset out of range");
>   chunk = amap_chunk_get(amap, slot, 1, PR_NOWAIT);
>   if (chunk == NULL)
>   return 1;
> @@ -1144,9 +1138,7 @@ amap_unadd(struct vm_aref *aref, vaddr_t
>  
>   AMAP_B2SLOT(slot, offset);
>   slot += aref->ar_pageoff;
> -
> - if (slot >= amap->am_nslot)
> - panic("amap_unadd: offset out of range");
> + KASSERT(slot < amap->am_nslot);
>   chunk = amap_chunk_get(amap, slot, 0, PR_NOWAIT);
>   if (chunk == NULL)
>   panic("amap_unadd: chunk for slot %d not present", slot);
> 
> 



Re: uvm: __inline -> inline

2020-09-22 Thread Mark Kettenis
> Date: Tue, 22 Sep 2020 10:37:40 +0200
> From: Martin Pieuchot 
> 
> On 22/09/20(Tue) 10:20, Mark Kettenis wrote:
> > > Date: Tue, 22 Sep 2020 09:15:00 +0200
> > > From: Martin Pieuchot 
> > > 
> > > Spell inline correctly, also reduce the diff with NetBSD for uvm_amap.c
> > > and uvm_fault.c.
> > > 
> > > ok?
> > 
> > In general, yes.  This might interfere with the diff that guenther@
> > did a while ago to lock amaps and unlock more of uvm.  Now that the
> > uvm_map_inentry() mystery is (largely) solved, it may be worth looking
> > into that diff again.  Or is that what you're doing right now?
> 
> That's what I am doing right now without the knowledge of guenther@'s
> prior work, could you share it?

Can't immediately find it; maybe you can ask him directly.



Re: uvm: __inline -> inline

2020-09-22 Thread Mark Kettenis
> Date: Tue, 22 Sep 2020 09:15:00 +0200
> From: Martin Pieuchot 
> 
> Spell inline correctly, also reduce the diff with NetBSD for uvm_amap.c
> and uvm_fault.c.
> 
> ok?

In general, yes.  This might interfere with the diff that guenther@
did a while ago to lock amaps and unlock more of uvm.  Now that the
uvm_map_inentry() mystery is (largely) solved, it may be worth looking
into that diff again.  Or is that what you're doing right now?

I'm not aware of any other large outstanding diffs in the uvm area.


> Index: uvm/uvm_addr.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_addr.c,v
> retrieving revision 1.28
> diff -u -p -r1.28 uvm_addr.c
> --- uvm/uvm_addr.c13 Sep 2020 10:05:25 -  1.28
> +++ uvm/uvm_addr.c22 Sep 2020 07:12:10 -
> @@ -186,7 +186,7 @@ uvm_addr_entrybyspace(struct uaddr_free_
>  }
>  #endif /* !SMALL_KERNEL */
>  
> -static __inline vaddr_t
> +static inline vaddr_t
>  uvm_addr_align_forward(vaddr_t addr, vaddr_t align, vaddr_t offset)
>  {
>   vaddr_t adjusted;
> @@ -201,7 +201,7 @@ uvm_addr_align_forward(vaddr_t addr, vad
>   return (adjusted < addr ? adjusted + align : adjusted);
>  }
>  
> -static __inline vaddr_t
> +static inline vaddr_t
>  uvm_addr_align_backward(vaddr_t addr, vaddr_t align, vaddr_t offset)
>  {
>   vaddr_t adjusted;
> Index: uvm/uvm_amap.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_amap.c,v
> retrieving revision 1.82
> diff -u -p -r1.82 uvm_amap.c
> --- uvm/uvm_amap.c4 Jan 2020 16:17:29 -   1.82
> +++ uvm/uvm_amap.c22 Sep 2020 07:07:45 -
> @@ -63,20 +63,20 @@ static char amap_small_pool_names[UVM_AM
>   */
>  
>  static struct vm_amap *amap_alloc1(int, int, int);
> -static __inline void amap_list_insert(struct vm_amap *);
> -static __inline void amap_list_remove(struct vm_amap *);   
> +static inline void amap_list_insert(struct vm_amap *);
> +static inline void amap_list_remove(struct vm_amap *);   
>  
>  struct vm_amap_chunk *amap_chunk_get(struct vm_amap *, int, int, int);
>  void amap_chunk_free(struct vm_amap *, struct vm_amap_chunk *);
>  void amap_wiperange_chunk(struct vm_amap *, struct vm_amap_chunk *, int, 
> int);
>  
> -static __inline void
> +static inline void
>  amap_list_insert(struct vm_amap *amap)
>  {
>   LIST_INSERT_HEAD(_list, amap, am_list);
>  }
>  
> -static __inline void
> +static inline void
>  amap_list_remove(struct vm_amap *amap)
>  { 
>   LIST_REMOVE(amap, am_list);
> @@ -190,13 +190,10 @@ amap_chunk_free(struct vm_amap *amap, st
>   * here are some in-line functions to help us.
>   */
>  
> -static __inline void pp_getreflen(int *, int, int *, int *);
> -static __inline void pp_setreflen(int *, int, int, int);
> -
>  /*
>   * pp_getreflen: get the reference and length for a specific offset
>   */
> -static __inline void
> +static inline void
>  pp_getreflen(int *ppref, int offset, int *refp, int *lenp)
>  {
>  
> @@ -212,7 +209,7 @@ pp_getreflen(int *ppref, int offset, int
>  /*
>   * pp_setreflen: set the reference and length for a specific offset
>   */
> -static __inline void
> +static inline void
>  pp_setreflen(int *ppref, int offset, int ref, int len)
>  {
>   if (len == 1) {
> Index: uvm/uvm_aobj.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_aobj.c,v
> retrieving revision 1.86
> diff -u -p -r1.86 uvm_aobj.c
> --- uvm/uvm_aobj.c18 Jul 2019 23:47:33 -  1.86
> +++ uvm/uvm_aobj.c22 Sep 2020 07:11:50 -
> @@ -256,7 +256,7 @@ uao_find_swhash_elt(struct uvm_aobj *aob
>  /*
>   * uao_find_swslot: find the swap slot number for an aobj/pageidx
>   */
> -__inline static int
> +inline static int
>  uao_find_swslot(struct uvm_aobj *aobj, int pageidx)
>  {
>  
> Index: uvm/uvm_fault.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_fault.c,v
> retrieving revision 1.98
> diff -u -p -r1.98 uvm_fault.c
> --- uvm/uvm_fault.c   12 Sep 2020 17:08:49 -  1.98
> +++ uvm/uvm_fault.c   22 Sep 2020 07:07:59 -
> @@ -159,7 +159,7 @@ static struct uvm_advice uvmadvice[MADV_
>   * private prototypes
>   */
>  static void uvmfault_amapcopy(struct uvm_faultinfo *);
> -static __inline void uvmfault_anonflush(struct vm_anon **, int);
> +static inline void uvmfault_anonflush(struct vm_anon **, int);
>  void uvmfault_unlockmaps(struct uvm_faultinfo *, boolean_t);
>  void uvmfault_update_stats(struct uvm_faultinfo *);
>  
> @@ -171,7 +171,7 @@ void  uvmfault_update_stats(struct uvm_fa
>   *
>   * => does not have to deactivate page if it is busy
>   */
> -static __inline void
> +static inline void
>  uvmfault_anonflush(struct vm_anon **anons, int n)
>  {
>   int lcv;
> Index: uvm/uvm_map.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_map.c,v
> retrieving revision 1.267
> diff -u 

Re: pmap_enter(9) doesn't sleep

2020-09-22 Thread Mark Kettenis
> Date: Tue, 22 Sep 2020 09:51:03 +0200
> From: Martin Pieuchot 
> 
> Allocations in the various pmap_enter(9) are done with uvm_pagealloc(9),
> which sets the UVM_PLA_NOWAIT flag, and/or with pool_get(9) w/ PR_NOWAIT.
> 
> So the comment below seems outdated to me, ok to kill it?

This matches my understanding.  ok kettenis@

> Index: uvm/uvm_fault.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_fault.c,v
> retrieving revision 1.98
> diff -u -p -r1.98 uvm_fault.c
> --- uvm/uvm_fault.c   12 Sep 2020 17:08:49 -  1.98
> +++ uvm/uvm_fault.c   22 Sep 2020 07:46:43 -
> @@ -702,13 +702,6 @@ ReFault:
>   pmap_update(ufi.orig_map->pmap);
>  
>   /* (shadowed == TRUE) if there is an anon at the faulting address */
> - /*
> -  * note that if we are really short of RAM we could sleep in the above
> -  * call to pmap_enter.   bad?
> -  *
> -  * XXX Actually, that is bad; pmap_enter() should just fail in that
> -  * XXX case.  --thorpej
> -  */
>   /*
>* if the desired page is not shadowed by the amap and we have a
>* backing object, then we check to see if the backing object would
> 
> 



Re: curproc vs MP vs locking

2020-09-15 Thread Mark Kettenis
> Date: Tue, 15 Sep 2020 12:34:07 +0200
> From: Martin Pieuchot 
> 
> Many functions in the kernel take a "struct proc *" as argument.  When
> reviewing diffs or reading the signature of such functions it is not
> clear if this pointer can be any thread or if it is, like in many cases,
> pointing to `curproc'.
> 
> This distinction matters when it comes to reading/writing members of
> this "struct proc" and that's why a growing number of functions start
> with the following idiom:
> 
>   KASSERT(p == curproc);
> 
> This is verbose and redundant, so I suggested to always use `curproc'
> and stop passing a "struct proc *" as argument when a function isn't
> meant to modify any thread.  claudio@ raised a concern of performance
> claiming that `curproc' isn't always cheap.  Is it still true?  Does
> the KASSERT()s make us pay the cost anyhow?

Right, because our kernel has DIAGNOSTIC enabled.

> If that's the case can we adopt a convention to help review functions
> that take a "struct proc *" but only mean `curproc'?  What about naming
> this parameter `curp' instead of `p'?

That'll result in quite a bit of churn.  I'd really like to avoid
doing that.



Re: PATCH: Add ACPI IVHD_EXT structure to acpireg.h

2020-09-15 Thread Mark Kettenis
> Date: Tue, 15 Sep 2020 01:37:33 -0500
> From: Jordan Hargrave 
> 
> This patch adds a couple of entries for AMD IOMMU structure
> definitions in ACPI

ok kettenis@

> Index: acpireg.h
> ===
> RCS file: /cvs/src/sys/dev/acpi/acpireg.h,v
> retrieving revision 1.45
> diff -u -p -r1.45 acpireg.h
> --- acpireg.h 28 Aug 2019 22:39:09 -  1.45
> +++ acpireg.h 15 Sep 2020 06:29:50 -
> @@ -623,6 +623,9 @@ struct acpi_ivmd {
>  struct acpi_ivhd {
>   uint8_t type;
>   uint8_t flags;
> +#define IVHD_PPRSUP  (1L << 7)
> +#define IVHD_PREFSUP (1L << 6)
> +#define IVHD_COHERENT(1L << 5)
>  #define IVHD_IOTLB   (1L << 4)
>  #define IVHD_ISOC(1L << 3)
>  #define IVHD_RESPASSPW   (1L << 2)
> @@ -638,13 +641,28 @@ struct acpi_ivhd {
>  #define IVHD_UNITID_MASK 0x1F
>  #define IVHD_MSINUM_SHIFT0
>  #define IVHD_MSINUM_MASK 0x1F
> - uint32_treserved;
> + uint32_tfeature;
> +} __packed;
> +
> +struct acpi_ivhd_ext {
> + uint8_t type;
> + uint8_t flags;
> + uint16_tlength;
> + uint16_tdevid;
> + uint16_tcap;
> + uint64_taddress;
> + uint16_tsegment;
> + uint16_tinfo;
> + uint32_tattrib;
> + uint64_tefr;
> + uint8_t reserved[8];
>  } __packed;
>  
>  union acpi_ivrs_entry {
>   struct {
>   uint8_t type;
>  #define IVRS_IVHD0x10
> +#define IVRS_IVHD_EXT0x11
>  #define IVRS_IVMD_ALL0x20
>  #define IVRS_IVMD_SPECIFIED  0x21
>  #define IVRS_IVMD_RANGE  0x22
> @@ -652,6 +670,7 @@ union acpi_ivrs_entry {
>   uint16_tlength;
>   } __packed;
>   struct acpi_ivhdivhd;
> + struct acpi_ivhd_extivhd_ext;
>   struct acpi_ivmdivmd;
>  } __packed;
>  
> 
> 



Re: [PATCH] Add IOMMU support for Intel VT-d and AMD-Vi

2020-09-14 Thread Mark Kettenis
> Date: Tue, 8 Sep 2020 21:43:39 -0500
> From: Jordan Hargrave 
> 
> Made changes for the iommu_readq -> iommu_read_8 and also now
> dynamically allocate the hwdte for AMD IOMMU.

Some more bits...

> On Fri, Sep 04, 2020 at 09:17:18PM +0200, Mark Kettenis wrote:
> > > Date: Fri, 4 Sep 2020 00:50:44 -0500
> > > From: Jordan Hargrave 
> > 
> > A few hints below...
> > 
> > > > > +
> > > > > +/* Page Table Entry per domain */
> > > > > +static struct ivhd_dte hwdte[65536] __aligned(PAGE_SIZE);
> > > > > +
> > > > > +/* Alias mapping */
> > > > > +#define SID_INVALID 0x8000L
> > > > > +static uint32_t sid_flag[65536];
> > > > 
> > > > Can we avoid having these large arrays, or at least allocate them
> > > > dynamically?  That would also avoid the explicit alignment which is
> > > > somewhat nasty since it affects the entire kernel.
> > > 
> > > OK. But the hwdte does need the 2M area to be all contiguous but it is not
> > > needed for DMAR/Intel.  You *can* have up to 8 different device table 
> > > entries
> > > though to split up the area.
> > 
> > The appropriate interface to use in this context is
> > bus_dmamem_alloc(9).  You can specify alignment, and if you set nsegs
> > to 1, you will get memory that is physicaly contiguous.
> > 
> > To map the memory into kernel address space you'll need create a map
> > using bus_dmamap_create(9) and map it using bus_dmamem_map(9).  Then
> > instead of using pmap_extract(9) you use bus_dmamap_load_raw(9) which
> > then populates the physical addresses.
> > 
> > Many of the drivers written by dlg@ define convenience functions to do
> > all these steps, although interestingly enough he tends to use
> > bus_dmamap_load(9) instead of bus_dmamap_load_raw(9) which is
> > sub-optimal.
> > 
> > > > > +
> > > > > +struct domain_dev {
> > > > > + int sid;
> > > > > + int sec;
> > > > > + int sub;
> > > > > + TAILQ_ENTRY(domain_dev) link;
> > > > > +};
> > > > > +
> > > > > +struct domain {
> > > > > + struct iommu_softc  *iommu;
> > > > > + int did;
> > > > > + int gaw;
> > > > > + struct pte_entry*pte;
> > > > > + paddr_t ptep;
> > > > > + struct bus_dma_tag  dmat;
> > > > > + int flag;
> > > > > +
> > > > > + struct mutexexlck;
> > > > > + charexname[32];
> > > > > + struct extent   *iovamap;
> > > > > + TAILQ_HEAD(,domain_dev) devices;
> > > > > + TAILQ_ENTRY(domain) link;
> > > > > +};
> > > > > +
> > > > > +#define DOM_DEBUG 0x1
> > > > > +#define DOM_NOMAP 0x2
> > > > > +
> > > > > +struct dmar_devlist {
> > > > > + int type;
> > > > > + int bus;
> > > > > + int ndp;
> > > > > + struct acpidmar_devpath *dp;
> > > > > + TAILQ_ENTRY(dmar_devlist)   link;
> > > > > +};
> > > > > +
> > > > > +TAILQ_HEAD(devlist_head, dmar_devlist);
> > > > > +
> > > > > +struct ivhd_devlist {
> > > > > + int start_id;
> > > > > + int end_id;
> > > > > + int cfg;
> > > > > + TAILQ_ENTRY(ivhd_devlist)   link;
> > > > > +};
> > > > > +
> > > > > +struct rmrr_softc {
> > > > > + TAILQ_ENTRY(rmrr_softc) link;
> > > > > + struct devlist_head devices;
> > > > > + int segment;
> > > > > + uint64_tstart;
> > > > > + uint64_tend;
> > > > > +};
> > > > > +
> > > > > +struct atsr_softc {
> > > > > + TAILQ_ENTRY(atsr_softc) link;
> > > > > + struct devlist_head devices;
> &

Re: Document the WSDISPLAYIO_GETSCREENTYPE ioctl in wsdisplay.4

2020-09-14 Thread Mark Kettenis
> Date: Mon, 14 Sep 2020 12:04:37 +0200
> From: Frederic Cambus 
> 
> Hi tech@,
> 
> Here is a diff to document the WSDISPLAYIO_GETSCREENTYPE ioctl in
> wsdisplay.4.
> 
> The wsdisplay_screentype structure definition can be found in
> .
> 
> Comments? OK?

Sure, go for it.

> Index: wsdisplay.4
> ===
> RCS file: /cvs/src/share/man/man4/wsdisplay.4,v
> retrieving revision 1.54
> diff -u -p -r1.54 wsdisplay.4
> --- wsdisplay.4   25 May 2019 11:02:19 -  1.54
> +++ wsdisplay.4   14 Sep 2020 09:56:03 -
> @@ -174,6 +174,38 @@ indicates the number of color map entrie
>  and
>  .Dv WSDISPLAYIO_PUTCMAP .
>  This call is likely to be unavailable on text-only displays.
> +.It Dv WSDISPLAYIO_GETSCREENTYPE Fa "struct wsdisplay_screentype"
> +Retrieve basic information about a screen.
> +The returned structure is as follows:
> +.Bd -literal -offset indent
> +struct wsdisplay_screentype {
> +int idx;
> +int nidx;
> +char name[WSSCREEN_NAME_SIZE];
> +int ncols, nrows;
> +int fontwidth, fontheight;
> +};
> +.Ed
> +.Pp
> +The
> +.Va idx
> +field indicates the index of the screen.
> +The
> +.Va nidx
> +field indicates the number of screens.
> +The
> +.Va name
> +field contains a human readable string used to identify the screen.
> +The 
> +.Va ncols
> +and
> +.Va  nrows
> +fields indicate the available number of columns and rows.
> +The 
> +.Va fontwidth
> +and
> +.Va fontheight
> +fields indicate the dimensions of a character cell and are counted in pixels.
>  .It Dv WSDISPLAYIO_GETCMAP Fa "struct wsdisplay_cmap"
>  Retrieve the current color map from the display.
>  This call needs the
> 
> 



Re: go/rust vs uvm_map_inentry()

2020-09-14 Thread Mark Kettenis
> Date: Sun, 13 Sep 2020 19:48:19 +0200
> From: Sebastien Marie 
> 
> On Sun, Sep 13, 2020 at 04:49:48PM +0200, Sebastien Marie wrote:
> > On Sun, Sep 13, 2020 at 03:29:57PM +0200, Martin Pieuchot wrote:
> > > I'm no longer able to reproduce the corruption while building lang/go
> > > with the diff below.  Something relevant to threading change in go since
> > > march?
> > > 
> > > Can someone try this diff and tell me if go and/or rust still fail?
> > 
> > quickly tested with rustc build (nightly here), and it is failing at random 
> > places (not always at the same) with memory errors (signal 11, compiler ICE 
> > signal 6...)
> > 
> 
> A first hint.
> 
> With the help of deraadt@, it was found that disabling
> uvm_map_inentry() call in usertrap() is enough to avoid the crashes.
> 
> To be clear, I am using the following diff:

The diff below fixes at (for amd64).

What's happening is that uvm_map_inentry() may sleep to grab the lock
of the map.  The fault address is read from cr2 in pageflttrap() which
gets called after this check and if the check sleeps, cr2 is likely to
be clobbered by a page fault in another process.

Diff below fixes this by reading cr2 early and passing it to pageflttrap().

ok?


Index: arch/amd64/amd64/trap.c
===
RCS file: /cvs/src/sys/arch/amd64/amd64/trap.c,v
retrieving revision 1.80
diff -u -p -r1.80 trap.c
--- arch/amd64/amd64/trap.c 19 Aug 2020 10:10:57 -  1.80
+++ arch/amd64/amd64/trap.c 14 Sep 2020 11:17:35 -
@@ -92,7 +92,7 @@
 
 #include "isa.h"
 
-intpageflttrap(struct trapframe *, int _usermode);
+intpageflttrap(struct trapframe *, uint64_t, int _usermode);
 void   kerntrap(struct trapframe *);
 void   usertrap(struct trapframe *);
 void   ast(struct trapframe *);
@@ -157,12 +157,11 @@ fault(const char *format, ...)
  * if something was so broken that we should panic.
  */
 int
-pageflttrap(struct trapframe *frame, int usermode)
+pageflttrap(struct trapframe *frame, uint64_t cr2, int usermode)
 {
struct proc *p = curproc;
struct pcb *pcb;
int error;
-   uint64_t cr2;
vaddr_t va;
struct vm_map *map;
vm_prot_t ftype;
@@ -172,7 +171,6 @@ pageflttrap(struct trapframe *frame, int
 
map = >p_vmspace->vm_map;
pcb = >p_addr->u_pcb;
-   cr2 = rcr2();
va = trunc_page((vaddr_t)cr2);
 
KERNEL_LOCK();
@@ -280,6 +278,7 @@ void
 kerntrap(struct trapframe *frame)
 {
int type = (int)frame->tf_trapno;
+   uint64_t cr2 = rcr2();
 
verify_smap(__func__);
uvmexp.traps++;
@@ -299,7 +298,7 @@ kerntrap(struct trapframe *frame)
/*NOTREACHED*/
 
case T_PAGEFLT: /* allow page faults in kernel mode */
-   if (pageflttrap(frame, 0))
+   if (pageflttrap(frame, cr2, 0))
return;
goto we_re_toast;
 
@@ -333,6 +332,7 @@ usertrap(struct trapframe *frame)
 {
struct proc *p = curproc;
int type = (int)frame->tf_trapno;
+   uint64_t cr2 = rcr2();
union sigval sv;
int sig, code;
 
@@ -381,7 +381,7 @@ usertrap(struct trapframe *frame)
break;
 
case T_PAGEFLT: /* page fault */
-   if (pageflttrap(frame, 1))
+   if (pageflttrap(frame, cr2, 1))
goto out;
/* FALLTHROUGH */
 



Re: go/rust vs uvm_map_inentry()

2020-09-13 Thread Mark Kettenis
> Date: Sun, 13 Sep 2020 17:54:18 +0200
> From: Martin Pieuchot 
> 
> On 13/09/20(Sun) 16:54, Mark Kettenis wrote:
> > > Date: Sun, 13 Sep 2020 16:49:48 +0200
> > > From: Sebastien Marie 
> > > 
> > > On Sun, Sep 13, 2020 at 03:29:57PM +0200, Martin Pieuchot wrote:
> > > > I'm no longer able to reproduce the corruption while building lang/go
> > > > with the diff below.  Something relevant to threading change in go since
> > > > march?
> > > > 
> > > > Can someone try this diff and tell me if go and/or rust still fail?
> > > 
> > > quickly tested with rustc build (nightly here), and it is failing at
> > > random places (not always at the same) with memory errors (signal
> > > 11, compiler ICE signal 6...)
> > 
> > Is it failing when you don't have tracing enabled and not failing when
> > the tracing is disabled perhaps?
> 
> It is failing even without tracing.

Sorry, I meant is it failing even with tracing?



Re: go/rust vs uvm_map_inentry()

2020-09-13 Thread Mark Kettenis
> From: "Theo de Raadt" 
> Date: Sun, 13 Sep 2020 08:56:04 -0600
> 
> Sebastien Marie  wrote:
> 
> > On Sun, Sep 13, 2020 at 03:29:57PM +0200, Martin Pieuchot wrote:
> > > I'm no longer able to reproduce the corruption while building lang/go
> > > with the diff below.  Something relevant to threading change in go since
> > > march?
> > > 
> > > Can someone try this diff and tell me if go and/or rust still fail?
> > 
> > quickly tested with rustc build (nightly here), and it is failing at random 
> > places (not always at the same) with memory errors (signal 11, compiler ICE 
> > signal 6...)
> 
> Ah, so that is a firm no.  Totally busted.
> 
> Clearly uvm_map_inentry_fix() obviously needs the KERNEL_LOCK in the
> presence of threads, I guess one thread can get into here while another
> is changing the map.
> 
> The first check in uvm_map_inentry_fix does two checks against the map,
> but the map is not locked:
> 
> if (addr < map->min_offset || addr >= map->max_offset)

No that should work; min_offset and max_offset are immutable after exec.

> > > Index: uvm/uvm_map.c
> > > ===
> > > RCS file: /cvs/src/sys/uvm/uvm_map.c,v
> > > retrieving revision 1.266
> > > diff -u -p -r1.266 uvm_map.c
> > > --- uvm/uvm_map.c 12 Sep 2020 17:08:50 -  1.266
> > > +++ uvm/uvm_map.c 13 Sep 2020 10:12:25 -
> > > @@ -1893,16 +1893,16 @@ uvm_map_inentry(struct proc *p, struct p
> > >   boolean_t ok = TRUE;
> > >  
> > >   if (uvm_map_inentry_recheck(serial, addr, ie)) {
> > > - KERNEL_LOCK();
> > >   ok = uvm_map_inentry_fix(p, ie, addr, fn, serial);
> > >   if (!ok) {
> > > + KERNEL_LOCK();
> > >   printf(fmt, p->p_p->ps_comm, p->p_p->ps_pid, p->p_tid,
> > >   addr, ie->ie_start, ie->ie_end);
> > >   p->p_p->ps_acflag |= AMAP;
> > >   sv.sival_ptr = (void *)PROC_PC(p);
> > >   trapsignal(p, SIGSEGV, 0, SEGV_ACCERR, sv);
> > > + KERNEL_UNLOCK();
> > >   }
> > > - KERNEL_UNLOCK();
> > >   }
> > >   return (ok);
> > >  }
> > > 
> > 
> > -- 
> > Sebastien Marie
> > 
> 
> 



Re: go/rust vs uvm_map_inentry()

2020-09-13 Thread Mark Kettenis
> Date: Sun, 13 Sep 2020 16:49:48 +0200
> From: Sebastien Marie 
> 
> On Sun, Sep 13, 2020 at 03:29:57PM +0200, Martin Pieuchot wrote:
> > I'm no longer able to reproduce the corruption while building lang/go
> > with the diff below.  Something relevant to threading change in go since
> > march?
> > 
> > Can someone try this diff and tell me if go and/or rust still fail?
> 
> quickly tested with rustc build (nightly here), and it is failing at
> random places (not always at the same) with memory errors (signal
> 11, compiler ICE signal 6...)

Is it failing when you don't have tracing enabled and not failing when
the tracing is disabled perhaps?

> > Index: uvm/uvm_map.c
> > ===
> > RCS file: /cvs/src/sys/uvm/uvm_map.c,v
> > retrieving revision 1.266
> > diff -u -p -r1.266 uvm_map.c
> > --- uvm/uvm_map.c   12 Sep 2020 17:08:50 -  1.266
> > +++ uvm/uvm_map.c   13 Sep 2020 10:12:25 -
> > @@ -1893,16 +1893,16 @@ uvm_map_inentry(struct proc *p, struct p
> > boolean_t ok = TRUE;
> >  
> > if (uvm_map_inentry_recheck(serial, addr, ie)) {
> > -   KERNEL_LOCK();
> > ok = uvm_map_inentry_fix(p, ie, addr, fn, serial);
> > if (!ok) {
> > +   KERNEL_LOCK();
> > printf(fmt, p->p_p->ps_comm, p->p_p->ps_pid, p->p_tid,
> > addr, ie->ie_start, ie->ie_end);
> > p->p_p->ps_acflag |= AMAP;
> > sv.sival_ptr = (void *)PROC_PC(p);
> > trapsignal(p, SIGSEGV, 0, SEGV_ACCERR, sv);
> > +   KERNEL_UNLOCK();
> > }
> > -   KERNEL_UNLOCK();
> > }
> > return (ok);
> >  }
> > 
> 
> -- 
> Sebastien Marie
> 
> 



Re: acpiapplesmc(4)

2020-09-12 Thread Mark Kettenis
> Date: Sat, 12 Sep 2020 12:48:48 +0200
> From: Marcus Glocker 
> Cc: m...@umaxx.net, j...@jcs.org, tech@openbsd.org
> Content-Type: text/plain; charset=utf-8
> 
> On Sat, 12 Sep 2020 10:28:23 +0200 (CEST)
> Mark Kettenis  wrote:
> 
> > > Date: Sat, 12 Sep 2020 10:00:13 +0200
> > > From: Marcus Glocker 
> > > 
> > > On Sat, 12 Sep 2020 09:34:18 +0200 (CEST)
> > > Mark Kettenis  wrote:
> > >   
> > > > > Date: Sat, 12 Sep 2020 09:20:26 +0200
> > > > > From: Marcus Glocker 
> > > > > 
> > > > > On Fri, 11 Sep 2020 23:18:56 +0200 (CEST)
> > > > > Mark Kettenis  wrote:
> > > > > 
> > > > > > > Date: Fri, 11 Sep 2020 17:42:23 +0200
> > > > > > > From: Marcus Glocker 
> > > > > > > 
> > > > > > > On Thu, 10 Sep 2020 23:44:38 +0200
> > > > > > > Joerg Jung  wrote:
> > > > > > >   
> > > > > > > > Don’t give up so quickly ;) 
> > > > > > > > let’s try to make the driver work on your iMac, send me
> > > > > > > > dmesg and sysctl hw output please.
> > > > > > > > 
> > > > > > > > Your idea of converting it to ACPI is the right thing to
> > > > > > > > do anyways, would be nice to get this working.  
> > > > > > > 
> > > > > > > Here we go:
> > > > > > > 
> > > > > > > $ dmesg | grep smc
> > > > > > > asmc0 at acpi0: SMC_ (smc-piketon) addr 0x300/0x20: rev
> > > > > > > 1.64f564, 276 keys
> > > > > > > 
> > > > > > > $ sysctl -a | grep smc
> > > > > > > hw.sensors.asmc0.temp0=27.00 degC (TA0P ambient)
> > > > > > > hw.sensors.asmc0.temp1=42.00 degC (TC0H cpu0 heatsink)
> > > > > > > hw.sensors.asmc0.temp2=55.00 degC (TG0D gpu0 diode)
> > > > > > > hw.sensors.asmc0.temp3=53.00 degC (TG0H gpu0 heatsink)
> > > > > > > hw.sensors.asmc0.temp4=38.00 degC (TL0P lcd proximity)
> > > > > > > hw.sensors.asmc0.temp5=41.00 degC (TO0P optical drive)
> > > > > > > hw.sensors.asmc0.temp6=50.00 degC (Tm0P memory controller)
> > > > > > > hw.sensors.asmc0.fan0=998 RPM (ODD, right mid rear)
> > > > > > > hw.sensors.asmc0.fan1=1158 RPM (HDD, center mid rear)
> > > > > > > hw.sensors.asmc0.fan2=1200 RPM (CPU, left lower rear)
> > > > > > > 
> > > > > > > Does that work for you guys?  
> > > > > > 
> > > > > > $ dmesg | grep smc
> > > > > > asmc0 at acpi0: SMC_ (smc-napa) addr 0x300/0x20: rev 1.3f503,
> > > > > > 137 keys
> > > > > > 
> > > > > > $ sysctl -a | grep smc
> > > > > > hw.sensors.asmc0.temp0=63.00 degC (TC0D cpu0 die core)
> > > > > > hw.sensors.asmc0.temp1=55.00 degC (TC0H cpu0 heatsink)
> > > > > > hw.sensors.asmc0.temp2=58.00 degC (TC0P cpu0 proximity)
> > > > > > hw.sensors.asmc0.temp3=52.00 degC (TN0P northbridge proximity)
> > > > > > hw.sensors.asmc0.temp4=52.00 degC (TN1P northbridge 2)
> > > > > > hw.sensors.asmc0.fan0=2077 RPM (Master, left upper front)
> > > > > > 
> > > > > > So yes, this works for me.
> > > > > 
> > > > > Cool.
> > > > >  
> > > > > > You'll need to make changes to the i386 GENERIC kernel as
> > > > > > well.
> > > > > 
> > > > > Yep, done.
> > > > > 
> > > > > > And I'd like to ask you to make one small change...
> > > > > 
> > > > > > > +const char *acpiapplesmc_hids[] = {  
> > > > > > 
> > > > > > ...can you rename this variable to asmc_hids[]?
> > > > > 
> > > > > Of course, copy/pasto, thanks for spotting.
> > > > > 
> > > > > Also Joerg did suggest to hard code smc0 in GENERIC like it was
> > > > > before, since it won't be possible that there is more than 1 SMC
> > > > > available on a machine.
> > > > 
> > > > Meh.  We tend to only do that if there is a fundamental reason why
> > > > there can only be one.  Bu

Re: acpiapplesmc(4)

2020-09-12 Thread Mark Kettenis
> Date: Sat, 12 Sep 2020 10:00:13 +0200
> From: Marcus Glocker 
> 
> On Sat, 12 Sep 2020 09:34:18 +0200 (CEST)
> Mark Kettenis  wrote:
> 
> > > Date: Sat, 12 Sep 2020 09:20:26 +0200
> > > From: Marcus Glocker 
> > > 
> > > On Fri, 11 Sep 2020 23:18:56 +0200 (CEST)
> > > Mark Kettenis  wrote:
> > >   
> > > > > Date: Fri, 11 Sep 2020 17:42:23 +0200
> > > > > From: Marcus Glocker 
> > > > > 
> > > > > On Thu, 10 Sep 2020 23:44:38 +0200
> > > > > Joerg Jung  wrote:
> > > > > 
> > > > > > Don’t give up so quickly ;) 
> > > > > > let’s try to make the driver work on your iMac, send me dmesg
> > > > > > and sysctl hw output please.
> > > > > > 
> > > > > > Your idea of converting it to ACPI is the right thing to do
> > > > > > anyways, would be nice to get this working.
> > > > > 
> > > > > Here we go:
> > > > > 
> > > > > $ dmesg | grep smc
> > > > > asmc0 at acpi0: SMC_ (smc-piketon) addr 0x300/0x20: rev
> > > > > 1.64f564, 276 keys
> > > > > 
> > > > > $ sysctl -a | grep smc
> > > > > hw.sensors.asmc0.temp0=27.00 degC (TA0P ambient)
> > > > > hw.sensors.asmc0.temp1=42.00 degC (TC0H cpu0 heatsink)
> > > > > hw.sensors.asmc0.temp2=55.00 degC (TG0D gpu0 diode)
> > > > > hw.sensors.asmc0.temp3=53.00 degC (TG0H gpu0 heatsink)
> > > > > hw.sensors.asmc0.temp4=38.00 degC (TL0P lcd proximity)
> > > > > hw.sensors.asmc0.temp5=41.00 degC (TO0P optical drive)
> > > > > hw.sensors.asmc0.temp6=50.00 degC (Tm0P memory controller)
> > > > > hw.sensors.asmc0.fan0=998 RPM (ODD, right mid rear)
> > > > > hw.sensors.asmc0.fan1=1158 RPM (HDD, center mid rear)
> > > > > hw.sensors.asmc0.fan2=1200 RPM (CPU, left lower rear)
> > > > > 
> > > > > Does that work for you guys?
> > > > 
> > > > $ dmesg | grep smc
> > > > asmc0 at acpi0: SMC_ (smc-napa) addr 0x300/0x20: rev 1.3f503, 137
> > > > keys
> > > > 
> > > > $ sysctl -a | grep smc
> > > > hw.sensors.asmc0.temp0=63.00 degC (TC0D cpu0 die core)
> > > > hw.sensors.asmc0.temp1=55.00 degC (TC0H cpu0 heatsink)
> > > > hw.sensors.asmc0.temp2=58.00 degC (TC0P cpu0 proximity)
> > > > hw.sensors.asmc0.temp3=52.00 degC (TN0P northbridge proximity)
> > > > hw.sensors.asmc0.temp4=52.00 degC (TN1P northbridge 2)
> > > > hw.sensors.asmc0.fan0=2077 RPM (Master, left upper front)
> > > > 
> > > > So yes, this works for me.  
> > > 
> > > Cool.
> > >
> > > > You'll need to make changes to the i386 GENERIC kernel as well.  
> > > 
> > > Yep, done.
> > >   
> > > > And I'd like to ask you to make one small change...  
> > >   
> > > > > +const char *acpiapplesmc_hids[] = {
> > > > 
> > > > ...can you rename this variable to asmc_hids[]?  
> > > 
> > > Of course, copy/pasto, thanks for spotting.
> > > 
> > > Also Joerg did suggest to hard code smc0 in GENERIC like it was
> > > before, since it won't be possible that there is more than 1 SMC
> > > available on a machine.  
> > 
> > Meh.  We tend to only do that if there is a fundamental reason why
> > there can only be one.  But it doesn't hurt.
> 
> Well, yeah, would be my initial preference as well.  Maybe Joerg can
> give a further explanation to underline why only one SMC can be found.
> If we are unsure we still can change it to 'asmc*'.

In practice there will only be one.  But there is nothing in the ACPI
driver to prevent it from supporting multiple SMC chips.

For the ISA driver it made sense to have amsc0 at isa? since there can
only be one at the specified address.

> > ok kettenis@
> 
> Thanks.
> 



Re: acpiapplesmc(4)

2020-09-12 Thread Mark Kettenis
> Date: Sat, 12 Sep 2020 09:20:26 +0200
> From: Marcus Glocker 
> 
> On Fri, 11 Sep 2020 23:18:56 +0200 (CEST)
> Mark Kettenis  wrote:
> 
> > > Date: Fri, 11 Sep 2020 17:42:23 +0200
> > > From: Marcus Glocker 
> > > 
> > > On Thu, 10 Sep 2020 23:44:38 +0200
> > > Joerg Jung  wrote:
> > >   
> > > > Don’t give up so quickly ;) 
> > > > let’s try to make the driver work on your iMac, send me dmesg and
> > > > sysctl hw output please.
> > > > 
> > > > Your idea of converting it to ACPI is the right thing to do
> > > > anyways, would be nice to get this working.  
> > > 
> > > Here we go:
> > > 
> > > $ dmesg | grep smc
> > > asmc0 at acpi0: SMC_ (smc-piketon) addr 0x300/0x20: rev 1.64f564,
> > > 276 keys
> > > 
> > > $ sysctl -a | grep smc
> > > hw.sensors.asmc0.temp0=27.00 degC (TA0P ambient)
> > > hw.sensors.asmc0.temp1=42.00 degC (TC0H cpu0 heatsink)
> > > hw.sensors.asmc0.temp2=55.00 degC (TG0D gpu0 diode)
> > > hw.sensors.asmc0.temp3=53.00 degC (TG0H gpu0 heatsink)
> > > hw.sensors.asmc0.temp4=38.00 degC (TL0P lcd proximity)
> > > hw.sensors.asmc0.temp5=41.00 degC (TO0P optical drive)
> > > hw.sensors.asmc0.temp6=50.00 degC (Tm0P memory controller)
> > > hw.sensors.asmc0.fan0=998 RPM (ODD, right mid rear)
> > > hw.sensors.asmc0.fan1=1158 RPM (HDD, center mid rear)
> > > hw.sensors.asmc0.fan2=1200 RPM (CPU, left lower rear)
> > > 
> > > Does that work for you guys?  
> > 
> > $ dmesg | grep smc
> > asmc0 at acpi0: SMC_ (smc-napa) addr 0x300/0x20: rev 1.3f503, 137 keys
> > 
> > $ sysctl -a | grep smc
> > hw.sensors.asmc0.temp0=63.00 degC (TC0D cpu0 die core)
> > hw.sensors.asmc0.temp1=55.00 degC (TC0H cpu0 heatsink)
> > hw.sensors.asmc0.temp2=58.00 degC (TC0P cpu0 proximity)
> > hw.sensors.asmc0.temp3=52.00 degC (TN0P northbridge proximity)
> > hw.sensors.asmc0.temp4=52.00 degC (TN1P northbridge 2)
> > hw.sensors.asmc0.fan0=2077 RPM (Master, left upper front)
> > 
> > So yes, this works for me.
> 
> Cool.
>  
> > You'll need to make changes to the i386 GENERIC kernel as well.
> 
> Yep, done.
> 
> > And I'd like to ask you to make one small change...
> 
> > > +const char *acpiapplesmc_hids[] = {  
> > 
> > ...can you rename this variable to asmc_hids[]?
> 
> Of course, copy/pasto, thanks for spotting.
> 
> Also Joerg did suggest to hard code smc0 in GENERIC like it was before,
> since it won't be possible that there is more than 1 SMC available on a
> machine.

Meh.  We tend to only do that if there is a fundamental reason why
there can only be one.  But it doesn't hurt.

ok kettenis@

> Index: sys/arch/amd64/conf/GENERIC
> ===
> RCS file: /cvs/src/sys/arch/amd64/conf/GENERIC,v
> retrieving revision 1.490
> diff -u -p -u -p -r1.490 GENERIC
> --- sys/arch/amd64/conf/GENERIC   2 Jun 2020 16:24:24 -   1.490
> +++ sys/arch/amd64/conf/GENERIC   12 Sep 2020 07:17:29 -
> @@ -68,6 +68,7 @@ glkgpio*at acpi?
>  sdhc*at acpi?
>  acpicbkbd*   at acpi?
>  acpials* at acpi?
> +asmc0at acpi?# Apple SMC
>  tpm* at acpi?
>  acpihve* at acpi?
>  acpisurface* at acpi?
> @@ -132,7 +133,6 @@ lm*   at wbsio?
>  uguru0   at isa? disable port 0xe0   # ABIT uGuru
>  
>  aps0 at isa? port 0x1600 # ThinkPad Active Protection System
> -asmc0at isa? port 0x300  # Apple SMC
>  
>  piixpm*  at pci? # Intel PIIX PM
>  iic* at piixpm?
> Index: sys/arch/i386/conf/GENERIC
> ===
> RCS file: /cvs/src/sys/arch/i386/conf/GENERIC,v
> retrieving revision 1.851
> diff -u -p -u -p -r1.851 GENERIC
> --- sys/arch/i386/conf/GENERIC22 Jun 2020 00:33:28 -  1.851
> +++ sys/arch/i386/conf/GENERIC12 Sep 2020 07:17:30 -
> @@ -62,6 +62,7 @@ acpivideo*  at acpi?
>  acpivout*at acpivideo?
>  acpipwrres*  at acpi?
>  aibs*at acpi?
> +asmc0at acpi?# Apple SMC
>  
>  option   PCIVERBOSE
>  option   EISAVERBOSE
> @@ -148,7 +149,6 @@ uguru0at isa? disable port 0xe0   # ABIT 
>  fins0at isa? port 0x4e   # Fintek F71805 Super I/O
>  
>  aps0 at isa? port 0x1600 # ThinkPad Active Protection System
> -asmc0at isa? port 0x300  # Apple SMC
>  
>  ither

Re: acpiapplesmc(4)

2020-09-11 Thread Mark Kettenis
> Date: Fri, 11 Sep 2020 17:42:23 +0200
> From: Marcus Glocker 
> 
> On Thu, 10 Sep 2020 23:44:38 +0200
> Joerg Jung  wrote:
> 
> > Don’t give up so quickly ;) 
> > let’s try to make the driver work on your iMac, send me dmesg and
> > sysctl hw output please.
> > 
> > Your idea of converting it to ACPI is the right thing to do anyways,
> > would be nice to get this working.
> 
> Here we go:
> 
> $ dmesg | grep smc
> asmc0 at acpi0: SMC_ (smc-piketon) addr 0x300/0x20: rev 1.64f564, 276
> keys
> 
> $ sysctl -a | grep smc
> hw.sensors.asmc0.temp0=27.00 degC (TA0P ambient)
> hw.sensors.asmc0.temp1=42.00 degC (TC0H cpu0 heatsink)
> hw.sensors.asmc0.temp2=55.00 degC (TG0D gpu0 diode)
> hw.sensors.asmc0.temp3=53.00 degC (TG0H gpu0 heatsink)
> hw.sensors.asmc0.temp4=38.00 degC (TL0P lcd proximity)
> hw.sensors.asmc0.temp5=41.00 degC (TO0P optical drive)
> hw.sensors.asmc0.temp6=50.00 degC (Tm0P memory controller)
> hw.sensors.asmc0.fan0=998 RPM (ODD, right mid rear)
> hw.sensors.asmc0.fan1=1158 RPM (HDD, center mid rear)
> hw.sensors.asmc0.fan2=1200 RPM (CPU, left lower rear)
> 
> Does that work for you guys?

$ dmesg | grep smc
asmc0 at acpi0: SMC_ (smc-napa) addr 0x300/0x20: rev 1.3f503, 137 keys

$ sysctl -a | grep smc
hw.sensors.asmc0.temp0=63.00 degC (TC0D cpu0 die core)
hw.sensors.asmc0.temp1=55.00 degC (TC0H cpu0 heatsink)
hw.sensors.asmc0.temp2=58.00 degC (TC0P cpu0 proximity)
hw.sensors.asmc0.temp3=52.00 degC (TN0P northbridge proximity)
hw.sensors.asmc0.temp4=52.00 degC (TN1P northbridge 2)
hw.sensors.asmc0.fan0=2077 RPM (Master, left upper front)

So yes, this works for me.

You'll need to make changes to the i386 GENERIC kernel as well.

And I'd like to ask you to make one small change...

> Index: sys/dev/acpi/asmc.c
> ===
> RCS file: sys/dev/acpi/asmc.c
> diff -N sys/dev/acpi/asmc.c
> --- /dev/null 1 Jan 1970 00:00:00 -
> +++ sys/dev/acpi/asmc.c   11 Sep 2020 15:32:38 -
> @@ -0,0 +1,744 @@
> +/*   $OpenBSD: asmc.c,v 1.33 2020/08/26 03:29:06 visa Exp $  */
> +/*
> + * Copyright (c) 2015 Joerg Jung 
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +/*
> + * Driver for Apple's System Management Controller (SMC) an H8S/2117 chip
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include 
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include 
> +
> +#define ASMC_BASE0x300   /* SMC base address */
> +#define ASMC_IOSIZE  32  /* I/O region size 0x300-0x31f */
> +
> +#define ASMC_DATA0x00/* SMC data port offset */
> +#define ASMC_COMMAND 0x04/* SMC command port offset */
> +#define ASMC_STATUS  0x1e/* SMC status port offset */
> +#define ASMC_INTERRUPT   0x1f/* SMC interrupt port offset */
> +
> +#define ASMC_READ0x10/* SMC read command */
> +#define ASMC_WRITE   0x11/* SMC write command */
> +#define ASMC_INFO0x13/* SMC info/type command */
> +
> +#define ASMC_OBF 0x01/* Output buffer full */
> +#define ASMC_IBF 0x02/* Input buffer full */
> +#define ASMC_ACCEPT  0x04
> +
> +#define ASMC_RETRY   3
> +#define ASMC_MAXLEN  32  /* SMC maximum data size len */
> +#define ASMC_NOTFOUND0x84/* SMC status key not found */
> +
> +#define ASMC_MAXTEMP 101 /* known asmc_prods temperature sensor keys */
> +#define ASMC_MAXFAN  10  /* fan keys with digits 0-9 */
> +#define ASMC_MAXLIGHT2   /* left and right light sensor */
> +#define ASMC_MAXMOTION   3   /* x y z axis motion sensors */
> +
> +struct asmc_prod {
> + const char  *pr_name;
> + uint8_t  pr_light;
> + const char  *pr_temp[ASMC_MAXTEMP];
> +};
> +
> +struct asmc_softc {
> + struct devicesc_dev;
> +
> + struct acpi_softc   *sc_acpi;
> + struct aml_node *sc_devnode;
> +
> + bus_space_tag_t  sc_iot;
> + bus_space_handle_t   sc_ioh;
> +
> + struct asmc_prod*sc_prod;
> + uint8_t  sc_nfans;  /* number of fans */
> + uint8_t  sc_lightlen;   /* light data len */
> + uint8_t  sc_backlight;  /* keyboard backlight value */
> +
> + 

Re: UVM tracepoints for dt(4)

2020-09-11 Thread Mark Kettenis
> Date: Fri, 11 Sep 2020 10:23:34 +0200
> From: Martin Pieuchot 
> 
> To investigate the race exposed by the last locking change in
> uvm_map_inentry() [0], I'd like to add the following tracepoints.
> 
> The idea is to compare page fault addresses and permissions with
> the insertion/removal of entries in a given map.  Diff below is
> the first part of the puzzle, ok?
> 
> [0] https://marc.info/?l=openbsd-tech=157293690312531=2
> 
> An example of bt(5) script using those tracepoints look like this:
> 
>   tracepoint:uvm:fault {
>   printf("%s:%d(%s) fault   0x%x type=0x%x, prot=0x%x\n",
>   nsecs, tid, comm, arg0, arg1, arg2);
>   }
>   tracepoint:uvm:map_insert {
>   printf("%s:%d(%s) insert [0x%x, 0x%x), prot=0x%x\n",
>nsecs, tid, comm, arg0, arg1, arg2);
>   }
>   tracepoint:uvm:map_remove {
>   printf("%s:%d(%s) remove [0x%x, 0x%x) prot=0x%x\n",
>nsecs, tid, comm, arg0, arg1, arg2);
>   }

Having more examples will help people to make use of this functionality.

ok kettenis@

> Index: uvm/uvm_fault.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_fault.c,v
> retrieving revision 1.97
> diff -u -p -r1.97 uvm_fault.c
> --- uvm/uvm_fault.c   8 Dec 2019 12:37:45 -   1.97
> +++ uvm/uvm_fault.c   11 Sep 2020 07:16:01 -
> @@ -38,6 +38,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include 
>  
> @@ -507,6 +508,7 @@ uvm_fault(vm_map_t orig_map, vaddr_t vad
>   pg = NULL;
>  
>   uvmexp.faults++;/* XXX: locking? */
> + TRACEPOINT(uvm, fault, vaddr, fault_type, access_type, NULL);
>  
>   /* init the IN parameters in the ufi */
>   ufi.orig_map = orig_map;
> Index: uvm/uvm_map.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_map.c,v
> retrieving revision 1.265
> diff -u -p -r1.265 uvm_map.c
> --- uvm/uvm_map.c 6 Jul 2020 19:22:40 -   1.265
> +++ uvm/uvm_map.c 11 Sep 2020 07:41:53 -
> @@ -95,6 +95,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #ifdef SYSVSHM
>  #include 
> @@ -455,6 +456,9 @@ uvm_mapent_addr_insert(struct vm_map *ma
>   KDASSERT((entry->start & (vaddr_t)PAGE_MASK) == 0 &&
>   (entry->end & (vaddr_t)PAGE_MASK) == 0);
>  
> + TRACEPOINT(uvm, map_insert,
> + entry->start, entry->end, entry->protection, NULL);
> +
>   UVM_MAP_REQ_WRITE(map);
>   res = RBT_INSERT(uvm_map_addr, >addr, entry);
>   if (res != NULL) {
> @@ -474,6 +478,9 @@ void
>  uvm_mapent_addr_remove(struct vm_map *map, struct vm_map_entry *entry)
>  {
>   struct vm_map_entry *res;
> +
> + TRACEPOINT(uvm, map_remove,
> + entry->start, entry->end, entry->protection, NULL);
>  
>   UVM_MAP_REQ_WRITE(map);
>   res = RBT_REMOVE(uvm_map_addr, >addr, entry);
> Index: dev/dt/dt_prov_static.c
> ===
> RCS file: /cvs/src/sys/dev/dt/dt_prov_static.c,v
> retrieving revision 1.2
> diff -u -p -r1.2 dt_prov_static.c
> --- dev/dt/dt_prov_static.c   25 Mar 2020 14:59:23 -  1.2
> +++ dev/dt/dt_prov_static.c   11 Sep 2020 07:43:58 -
> @@ -34,7 +34,7 @@ struct dt_provider dt_prov_static = {
>  };
>  
>  /*
> - * Scheduler provider
> + * Scheduler
>   */
>  DT_STATIC_PROBE2(sched, dequeue, "pid_t", "pid_t");
>  DT_STATIC_PROBE2(sched, enqueue, "pid_t", "pid_t");
> @@ -51,6 +51,13 @@ DT_STATIC_PROBE1(raw_syscalls, sys_enter
>  DT_STATIC_PROBE1(raw_syscalls, sys_exit, "register_t");
>  
>  /*
> + * UVM
> + */
> +DT_STATIC_PROBE3(uvm, fault, "vaddr_t", "vm_fault_t", "vm_prot_t");
> +DT_STATIC_PROBE3(uvm, map_insert, "vaddr_t", "vaddr_t", "vm_prot_t");
> +DT_STATIC_PROBE3(uvm, map_remove, "vaddr_t", "vaddr_t", "vm_prot_t");
> +
> +/*
>   * List of all static probes
>   */
>  struct dt_probe *dtps_static[] = {
> @@ -65,6 +72,10 @@ struct dt_probe *dtps_static[] = {
>   /* Raw syscalls */
>   &_DT_STATIC_P(raw_syscalls, sys_enter),
>   &_DT_STATIC_P(raw_syscalls, sys_exit),
> + /* UVM */
> + &_DT_STATIC_P(uvm, fault),
> + &_DT_STATIC_P(uvm, map_insert),
> + &_DT_STATIC_P(uvm, map_remove),
>  };
>  
>  int
> 
> 



Re: acpiapplesmc(4)

2020-09-10 Thread Mark Kettenis
> Date: Thu, 10 Sep 2020 14:07:39 +0200
> From: Marcus Glocker 
> Cc: j...@jcs.org, tech@openbsd.org
> Content-Type: text/plain; charset=US-ASCII
> 
> On Mon, 7 Sep 2020 21:39:55 +0200
> Marcus Glocker  wrote:
> 
> > On Mon, 7 Sep 2020 20:50:20 +0200 (CEST)
> > Mark Kettenis  wrote:
> > 
> > > > Date: Mon, 7 Sep 2020 19:59:13 +0200
> > > > From: Marcus Glocker 
> > > > 
> > > > On Mon, 7 Sep 2020 19:25:00 +0200 (CEST)
> > > > Mark Kettenis  wrote:
> > > > 
> > > > > > Date: Mon, 7 Sep 2020 12:02:15 -0500
> > > > > > From: joshua stein 
> > > > > > 
> > > > > > On Mon, 07 Sep 2020 at 06:58:01 +0200, Marcus Glocker wrote:
> > > > > > 
> > > > > > > This is an initial driver for the Apple System Management
> > > > > > > Controller found in Intel based Apple computers.
> > > > > > > 
> > > > > > > The driver is currently missing support for the Sudden
> > > > > > > Motion Sensor (SMS), light sensor, and keyboard backlight
> > > > > > > since I don't have that hardware available to develop on.
> > > > > > > 
> > > > > > > On my iMac11,2 it can deliver fan and temperatures values:
> > > > > > > 
> > > > > > >   hw.sensors.acpiapplesmc0.temp0=24.00 degC (Airflow
> > > > > > > 1) hw.sensors.acpiapplesmc0.temp1=33.00 degC (CPU Core 0)
> > > > > > >   hw.sensors.acpiapplesmc0.temp2=36.00 degC (CPU
> > > > > > > Heatsink) hw.sensors.acpiapplesmc0.temp3=40.00 degC (CPU
> > > > > > > Core 1) hw.sensors.acpiapplesmc0.temp4=47.00 degC (GPU)
> > > > > > >   hw.sensors.acpiapplesmc0.temp5=45.00 degC (GPU
> > > > > > > Heatsink) hw.sensors.acpiapplesmc0.temp6=59.00 degC (PCH)
> > > > > > >   hw.sensors.acpiapplesmc0.temp7=42.00 degC (Memory)
> > > > > > >   hw.sensors.acpiapplesmc0.temp8=45.00 degC (Mainboard
> > > > > > > Proximity) hw.sensors.acpiapplesmc0.fan0=998 RPM
> > > > > > >   hw.sensors.acpiapplesmc0.fan1=1132 RPM
> > > > > > >   hw.sensors.acpiapplesmc0.fan2=1198 RPM
> > > > > > > 
> > > > > > > Feedback, testers, OKs?  
> > > > > > 
> > > > > > Are there machines where asmc(4) will also attach?  
> > > > > 
> > > > > Good point.  My old Macmini1,1 has:
> > > > > 
> > > > > ...
> > > > > "APP0001" at acpi0 not configured
> > > > > ...
> > > > > asmc0 at isa0 port 0x300/32: rev 1.3f503, 137 keys
> > > > > ...
> > > > > 
> > > > > So yes, I'd say there are.
> > > > > 
> > > > > 
> > > > > Having an acpi attachment is probably better than doing isa
> > > > > probes. But we probably should consolidate the drivers.
> > > > 
> > > > D'oh!  I wasn't even aware that we already have an asmc(4) driver
> > > > in our tree.  Shame on me :-|
> > > > 
> > > > Glancing over asmc(4) I don't think there is anything more that my
> > > > driver would support other than attaching over acpi(4).  Would it
> > > > be possible to only write an acpi glue which attaches to asmc(4)?
> > > >
> > > 
> > > I think we'd just want to turn it into an acpi(4) driver.  Or maybe
> > > dump it in favour of your driver.  
> > 
> > Ok.  I'll give it a try to convert asmc(4) in to an acpi(4) driver and
> > see how it works here.
> 
> I can make asmc(4) attach through acpi(4) on my machine, but then it
> crashes with uvm fault because it doesn't seem to recognize my machine
> type, and tries to access a NULL pointer on 'sc_prod'.
> 
> My approach was more to not relay on specific machine types, but just
> check what sensors are supported by the SMC and try to use them.  I
> lack a bit motivation to add specific models to asmc(4) to be honest.
> 
> One question just out of curiosity;  When you use acpiapplesmc(4) on
> your iMac, does it support any sensors there, or even does it work at
> all?

So this is what I get with asmc(4):

hw.sensors.cpu0.temp0=54.00 degC
hw.sensors.asmc0.temp0=60.00 degC (TC0D cpu0 die core)
hw.sensors.asmc0.temp1=52.00 degC (TC0H cpu0 heatsink)
hw.sensors.asmc0.temp2=54.00 degC (TC0P cpu0 proximity)
hw.

Re: timeout(9): add clock-based timeouts (attempt 2)

2020-09-08 Thread Mark Kettenis
> Date: Mon, 7 Sep 2020 18:50:44 -0500
> From: Scott Cheloha 
> 
> On Sat, Sep 05, 2020 at 01:11:59PM +0200, Mark Kettenis wrote:
> > > Date: Fri, 4 Sep 2020 17:55:39 -0500
> > > From: Scott Cheloha 
> > > 
> > > On Sat, Jul 25, 2020 at 08:46:08PM -0500, Scott Cheloha wrote:
> > > > 
> > > > [...]
> > > > 
> > > > I want to add clock-based timeouts to the kernel because tick-based
> > > > timeouts suffer from a few problems:
> > > > 
> > > > [...]
> > > > 
> > > > Basically, ticks are a poor approximation for the system clock.  We
> > > > should use the real thing where possible.
> > > > 
> > > > [...]
> > > > 
> > > > Thoughts on this approach?  Thoughts on the proposed API?
> > > 
> > > 6 week bump.
> > > 
> > > Attached is an rebased and streamlined diff.
> > > 
> > > Let's try again:
> > > 
> > > This patch adds support for timeouts scheduled against the hardware
> > > timecounter.  I call these "kclock timeouts".  They are distinct from
> > > the current tick-based timeouts because ticks are "software time", not
> > > "real time".
> > 
> > So what's the end game here?  Are these kclock-based timeouts going to
> > replace the tick-based timeouts at some point in the future?  I can
> > see why you want to have both in parallel for a while, but long-term I
> > don't think we want to keep both.
> 
> Ideally we would replace tick-based timeouts entirely with kclock
> timeouts eventually.
> 
> There are a few roadblocks, though:
> 
> 1. The scheduler is tick-based.  If you want to wait until the next
>tick, the easiest way to do that is with timeout_add(9) or tsleep(9).

I don't think this really matters in most cases.  Keeping the tick as
the base for a scheduling quantum is probably wise for now, but I
don't think it matters that timeouts and tsleeps (especially tsleeps)
are actually synchronized to the scheduling clock.

> 2. Linux has ktimers, which is tick-based.  drm uses it.  Shouldn't
>we have a tick-based timeout interface for compatibility with them?
>We could fake it, like FreeBSD does, but doing so is probably more
>complicated than just keeping support for tick-based timeouts.

You can easily emulate this using an absolute timer that you keep
rescheduling.  I think that is preferable to keeping a complete
separate tick-based timeout system.

> 3. Scheduling a timeout with timeout_add(9) is fast.  Scheduling a
>timeout with timeout_in_nsec(9) involves a clock read.  It is slower.
>It is probably too slow for some code.
> 
> (1) will be overcome if ever the scheduler is no longer tick-based.
> 
> (2) is tricky.  Maybe you or jsg@ have an opinion?

Not really.  But I don't think the Linux ktimers tick at the same rate
as ours so I don't think it matters.

> (3) is somewhat easier to fix.  I intend to introduce a TIMEOUT_COARSE
> flag in the future which causes timeout_in_nsec() to call
> getnanouptime(9) instead of nanouptime(9).  Reading the timestamp is
> faster than reading the clock.  You lose accuracy, but any code
> worried about the overhead of reading the clock is probably not very
> concerned with accuracy.

Right.

> > We don't really want to do a wholesale conversion of APIs again I'd
> > say.  So at some point the existing timeout_add_xxx() calls should be
> > implemented in terms of "kclock timeouts".
> 
> We can do this, but we'll still need to change the calls that
> reschedule a periodic timeout to use the dedicated rescheduling
> interface.  Otherwise those periodic timeouts will drift.  They don't
> currently drift because a tick is a very coarse unit of time.  With
> nanosecond resolution we'll get drift.

Periodic timeouts are rare.  At least those that care about drift.

> > This implementation is still tick driven, so it doesn't really provide
> > sub-tick resolution.
> 
> Yes, that's right.  Each timeout maintains nanosecond resolution for
> its expiration time but will only actually run after hardclock(9) runs
> and dumps the timeout to softclock().
> 
> We would need to implement a more flexible clock interrupt scheduler
> to run timeouts in between hardclocks.
> 
> > What does that mean for testing this?  I mean if we spend a lot of time
> > now to verify that subsystems can tolerate the more fine-grained timeouts,
> > we need to that again when you switch from having a period interrupt driving
> > the wheel to having a scheduled interrupt isn't it?
> 
> Yes.  But both changes can break things.

Re: PATCH: Fix PCI Config Space union size on VMM

2020-09-08 Thread Mark Kettenis
> Date: Mon, 7 Sep 2020 17:52:55 -0500
> From: Jordan Hargrave 

Yes that would be better.  The usage of __packed here is questionable,
but that is not your fault.

ok kettenis@

> Index: pci.h
> ===
> RCS file: /cvs/src/usr.sbin/vmd/pci.h,v
> retrieving revision 1.7
> diff -u -p -u -r1.7 pci.h
> --- pci.h 17 Sep 2017 23:07:56 -  1.7
> +++ pci.h 7 Sep 2020 22:48:09 -
> @@ -32,43 +32,44 @@ typedef int (*pci_iobar_fn_t)(int dir, u
>  void *, uint8_t);
>  typedef int (*pci_mmiobar_fn_t)(int dir, uint32_t ofs, uint32_t *data);
>  
> -union pci_dev {
> - uint32_t pd_cfg_space[PCI_CONFIG_SPACE_SIZE / 4];
>  
> - struct {
> - uint16_t pd_vid;
> - uint16_t pd_did;
> - uint16_t pd_cmd;
> - uint16_t pd_status;
> - uint8_t pd_rev;
> - uint8_t pd_prog_if;
> - uint8_t pd_subclass;
> - uint8_t pd_class;
> - uint8_t pd_cache_size;
> - uint8_t pd_lat_timer;
> - uint8_t pd_header_type;
> - uint8_t pd_bist;
> - uint32_t pd_bar[PCI_MAX_BARS];
> - uint32_t pd_cardbus_cis;
> - uint16_t pd_subsys_vid;
> - uint16_t pd_subsys_id;
> - uint32_t pd_exp_rom_addr;
> - uint8_t pd_cap;
> - uint32_t pd_reserved0 : 24;
> - uint32_t pd_reserved1;
> - uint8_t pd_irq;
> - uint8_t pd_int;
> - uint8_t pd_min_grant;
> - uint8_t pd_max_grant;
> +struct pci_dev {
> + union {
> + uint32_t pd_cfg_space[PCI_CONFIG_SPACE_SIZE / 4];
> + struct {
> + uint16_t pd_vid;
> + uint16_t pd_did;
> + uint16_t pd_cmd;
> + uint16_t pd_status;
> + uint8_t pd_rev;
> + uint8_t pd_prog_if;
> + uint8_t pd_subclass;
> + uint8_t pd_class;
> + uint8_t pd_cache_size;
> + uint8_t pd_lat_timer;
> + uint8_t pd_header_type;
> + uint8_t pd_bist;
> + uint32_t pd_bar[PCI_MAX_BARS];
> + uint32_t pd_cardbus_cis;
> + uint16_t pd_subsys_vid;
> + uint16_t pd_subsys_id;
> + uint32_t pd_exp_rom_addr;
> + uint8_t pd_cap;
> + uint32_t pd_reserved0 : 24;
> + uint32_t pd_reserved1;
> + uint8_t pd_irq;
> + uint8_t pd_int;
> + uint8_t pd_min_grant;
> + uint8_t pd_max_grant;
> + } __packed;
> + };
> + uint8_t pd_bar_ct;
> + pci_cs_fn_t pd_csfunc;
>  
> - uint8_t pd_bar_ct;
> - pci_cs_fn_t pd_csfunc;
> -
> - uint8_t pd_bartype[PCI_MAX_BARS];
> - uint32_t pd_barsize[PCI_MAX_BARS];
> - void *pd_barfunc[PCI_MAX_BARS];
> - void *pd_bar_cookie[PCI_MAX_BARS];
> - } __packed;
> + uint8_t pd_bartype[PCI_MAX_BARS];
> + uint32_t pd_barsize[PCI_MAX_BARS];
> + void *pd_barfunc[PCI_MAX_BARS];
> + void *pd_bar_cookie[PCI_MAX_BARS];
>  };
>  
>  struct pci {
> @@ -79,7 +80,7 @@ struct pci {
>   uint32_t pci_addr_reg;
>   uint32_t pci_data_reg;
>  
> - union pci_dev pci_devices[PCI_CONFIG_MAX_DEV];
> + struct pci_dev pci_devices[PCI_CONFIG_MAX_DEV];
>  };
>  
>  void pci_handle_address_reg(struct vm_run_params *);
> 
> 



Re: acpiapplesmc(4)

2020-09-07 Thread Mark Kettenis
> Date: Mon, 7 Sep 2020 19:59:13 +0200
> From: Marcus Glocker 
> 
> On Mon, 7 Sep 2020 19:25:00 +0200 (CEST)
> Mark Kettenis  wrote:
> 
> > > Date: Mon, 7 Sep 2020 12:02:15 -0500
> > > From: joshua stein 
> > > 
> > > On Mon, 07 Sep 2020 at 06:58:01 +0200, Marcus Glocker wrote:  
> > > > This is an initial driver for the Apple System Management
> > > > Controller found in Intel based Apple computers.
> > > > 
> > > > The driver is currently missing support for the Sudden Motion
> > > > Sensor (SMS), light sensor, and keyboard backlight since I don't
> > > > have that hardware available to develop on.
> > > > 
> > > > On my iMac11,2 it can deliver fan and temperatures values:
> > > > 
> > > > hw.sensors.acpiapplesmc0.temp0=24.00 degC (Airflow 1)
> > > > hw.sensors.acpiapplesmc0.temp1=33.00 degC (CPU Core 0)
> > > > hw.sensors.acpiapplesmc0.temp2=36.00 degC (CPU Heatsink)
> > > > hw.sensors.acpiapplesmc0.temp3=40.00 degC (CPU Core 1)
> > > > hw.sensors.acpiapplesmc0.temp4=47.00 degC (GPU)
> > > > hw.sensors.acpiapplesmc0.temp5=45.00 degC (GPU Heatsink)
> > > > hw.sensors.acpiapplesmc0.temp6=59.00 degC (PCH)
> > > > hw.sensors.acpiapplesmc0.temp7=42.00 degC (Memory)
> > > > hw.sensors.acpiapplesmc0.temp8=45.00 degC (Mainboard
> > > > Proximity) hw.sensors.acpiapplesmc0.fan0=998 RPM
> > > > hw.sensors.acpiapplesmc0.fan1=1132 RPM
> > > > hw.sensors.acpiapplesmc0.fan2=1198 RPM
> > > > 
> > > > Feedback, testers, OKs?  
> > > 
> > > Are there machines where asmc(4) will also attach?  
> > 
> > Good point.  My old Macmini1,1 has:
> > 
> > ...
> > "APP0001" at acpi0 not configured
> > ...
> > asmc0 at isa0 port 0x300/32: rev 1.3f503, 137 keys
> > ...
> > 
> > So yes, I'd say there are.
> > 
> > 
> > Having an acpi attachment is probably better than doing isa probes.
> > But we probably should consolidate the drivers.
> 
> D'oh!  I wasn't even aware that we already have an asmc(4) driver in our
> tree.  Shame on me :-|
> 
> Glancing over asmc(4) I don't think there is anything more that my
> driver would support other than attaching over acpi(4).  Would it be
> possible to only write an acpi glue which attaches to asmc(4)?

I think we'd just want to turn it into an acpi(4) driver.  Or maybe
dump it in favour of your driver.



Re: [PATCH] Add common PCIE capability list

2020-09-07 Thread Mark Kettenis
> Date: Mon, 7 Sep 2020 13:33:14 -0500
> From: Jordan Hargrave 
> 
> Attaching the full diff

ok kettenis@

> On Mon, Sep 07, 2020 at 01:09:12PM -0500, Jordan Hargrave wrote:
> > On Thu, Sep 03, 2020 at 08:37:56PM +0200, Mark Kettenis wrote:
> > > > Date: Wed, 2 Sep 2020 15:19:55 +1000
> > > > From: Jonathan Gray 
> > > > 
> > > > On Tue, Sep 01, 2020 at 11:44:03PM -0500, Jordan Hargrave wrote:
> > > > > This patch adds a common function for scanning PCIE Express 
> > > > > Capability list
> > > > > The PCIE Capability list starts at 0x100 in extended PCI 
> > > > > configuration space.
> > > > 
> > > > This seems to only handle extended capabilities?
> > > > Something like pcie_get_ext_capability() would be a better name.
> > > 
> > > I think it should be pci_get_ext_capability() which follows the
> > > pattern set by pci_get_ht_capability().
> > > 
> > > > 
> > > > It is 'PCI Express' not 'PCIExpress'
> > > > 
> > > > 'ofs & 3' test doesn't make sense when PCI_PCIE_ECAP_NEXT() always
> > > > masks those bits.
> > > > 
> > > > > 
> > > > > ---
> > > > >  sys/dev/pci/pci.c| 28 
> > > > >  sys/dev/pci/pcivar.h |  2 ++
> > > > >  2 files changed, 30 insertions(+)
> > > > > 
> > > > > diff --git a/sys/dev/pci/pci.c b/sys/dev/pci/pci.c
> > > > > index bf75f875e..8f9a5ef7a 100644
> > > > > --- a/sys/dev/pci/pci.c
> > > > > +++ b/sys/dev/pci/pci.c
> > > > > @@ -677,6 +677,34 @@ pci_get_ht_capability(pci_chipset_tag_t pc, 
> > > > > pcitag_t tag, int capid,
> > > > >   return (0);
> > > > >  }
> > > > >  
> > > > > +int
> > > > > +pcie_get_capability(pci_chipset_tag_t pc, pcitag_t tag, int capid,
> > > > > +int *offset, pcireg_t *value)
> > > > > +{
> > > > > + pcireg_t reg;
> > > > > + unsigned int ofs;
> > > > > +
> > > > > + /* Make sure we support PCIExpress device */
> > > 
> > > PCI Express like jsg@ already mentioned.  Add a full stop at the end
> > > of the sentence.
> > > 
> > > > > + if (pci_get_capability(pc, tag, PCI_CAP_PCIEXPRESS, NULL, NULL) 
> > > > > == 0)
> > > > > + return (0);
> > > > > + /* Scan PCIExpress capabilities */
> > > 
> > > Drop this comment and replace with a blank line such that it matches
> > > pci_get_ht_capability().
> > > 
> > > > > + ofs = PCI_PCIE_ECAP;
> > > > > + while (ofs != 0) {
> > > > > + if ((ofs & 3) || (ofs < PCI_PCIE_ECAP))
> > > > > + return (0);
> > > 
> > > Make this check #ifdef DIAGNOSTIC like pci_get_ht_capability() doesn.
> > > Dropping the (ofs & 3) bit is indeed a good idea.
> > > 
> > > > > + reg = pci_conf_read(pc, tag, ofs);
> > > > > + if (PCI_PCIE_ECAP_ID(reg) == capid) {
> > > > > + if (offset)
> > > > > + *offset = ofs;
> > > > > + if (value)
> > > > > + *value = reg;
> > > > > + return (1);
> > > > > + }
> > > > > + ofs = PCI_PCIE_ECAP_NEXT(reg);
> > > > > + }
> > > 
> > > Blank line here please.
> > > 
> > > > > + return (0);
> > > > > +}
> > > > > +
> > > > >  uint16_t
> > > > >  pci_requester_id(pci_chipset_tag_t pc, pcitag_t tag)
> > > > >  {
> > > > > diff --git a/sys/dev/pci/pcivar.h b/sys/dev/pci/pcivar.h
> > > > > index bdfe0404f..0376ba992 100644
> > > > > --- a/sys/dev/pci/pcivar.h
> > > > > +++ b/sys/dev/pci/pcivar.h
> > > > > @@ -233,6 +233,8 @@ int   pci_io_find(pci_chipset_tag_t, 
> > > > > pcitag_t, int, bus_addr_t *,
> > > > >  int  pci_mem_find(pci_chipset_tag_t, pcitag_t, int, bus_addr_t *,
> > > > >   bus_size_t *, int *);
> > > > >  
> > > > > +int  pcie_get_capabilit

Re: acpiapplesmc(4)

2020-09-07 Thread Mark Kettenis
> Date: Mon, 7 Sep 2020 12:02:15 -0500
> From: joshua stein 
> 
> On Mon, 07 Sep 2020 at 06:58:01 +0200, Marcus Glocker wrote:
> > This is an initial driver for the Apple System Management Controller
> > found in Intel based Apple computers.
> > 
> > The driver is currently missing support for the Sudden Motion Sensor
> > (SMS), light sensor, and keyboard backlight since I don't have that
> > hardware available to develop on.
> > 
> > On my iMac11,2 it can deliver fan and temperatures values:
> > 
> > hw.sensors.acpiapplesmc0.temp0=24.00 degC (Airflow 1)
> > hw.sensors.acpiapplesmc0.temp1=33.00 degC (CPU Core 0)
> > hw.sensors.acpiapplesmc0.temp2=36.00 degC (CPU Heatsink)
> > hw.sensors.acpiapplesmc0.temp3=40.00 degC (CPU Core 1)
> > hw.sensors.acpiapplesmc0.temp4=47.00 degC (GPU)
> > hw.sensors.acpiapplesmc0.temp5=45.00 degC (GPU Heatsink)
> > hw.sensors.acpiapplesmc0.temp6=59.00 degC (PCH)
> > hw.sensors.acpiapplesmc0.temp7=42.00 degC (Memory)
> > hw.sensors.acpiapplesmc0.temp8=45.00 degC (Mainboard Proximity)
> > hw.sensors.acpiapplesmc0.fan0=998 RPM
> > hw.sensors.acpiapplesmc0.fan1=1132 RPM
> > hw.sensors.acpiapplesmc0.fan2=1198 RPM
> > 
> > Feedback, testers, OKs?
> 
> Are there machines where asmc(4) will also attach?

Good point.  My old Macmini1,1 has:

...
"APP0001" at acpi0 not configured
...
asmc0 at isa0 port 0x300/32: rev 1.3f503, 137 keys
...

So yes, I'd say there are.


Having an acpi attachment is probably better than doing isa probes.
But we probably should consolidate the drivers.



Re: amd64: add tsc_delay(), a TSC-based delay(9) implementation

2020-09-05 Thread Mark Kettenis
> Date: Sat, 5 Sep 2020 09:49:25 -0500
> From: Scott Cheloha 
> Cc: Mark Kettenis , Mike Larkin 
> Content-Type: text/plain; charset=us-ascii
> Content-Disposition: inline
> 
> On Tue, Aug 25, 2020 at 12:22:19PM -0700, Mike Larkin wrote:
> > On Tue, Aug 25, 2020 at 12:12:36PM -0700, Mike Larkin wrote:
> > > On Mon, Aug 24, 2020 at 01:55:45AM +0200, Mark Kettenis wrote:
> > > > > Date: Sun, 23 Aug 2020 18:11:12 -0500
> > > > > From: Scott Cheloha 
> > > > >
> > > > > Hi,
> > > > >
> > > > > Other BSDs use the TSC to implement delay(9) if the TSC is constant
> > > > > and invariant.  Here's a patch to add something similar to our kernel.
> > > >
> > > > If the TSC is fine as a timecounter it should be absolutely fine for
> > > > use as delay().  And we could even use if the TSC isn't synchronized
> > > > between CPUs.
> > > >
> > > > > This patch (or something equivalent) is a prerequisite to running the
> > > > > lapic timer in oneshot or TSC deadline mode.  Using the lapic timer to
> > > > > implement delay(9) when it isn't running in periodic mode is too
> > > > > complicated.  However, using the i8254 for delay(9) is too slow.  We
> > > > > need an alternative.
> > > >
> > > > Hmm, but what are we going to use on machines where the TSC isn't
> > > > constant/invariant?
> > > >
> > > > In what respect is the i8254 too slow?  Does it take more than a
> > > > microsecond to read it?
> > >
> > > It's 3 outb/inb pairs to ensure you get the reading correct. So that could
> > > be quite a long time (as cheloha@ points out). Also, that's 6 VM exits if
> > > running virtually (I realize that's not the main use case here but just
> > > saying...)
> > >
> > > IIRC the 3 in/out pairs are the latch command followed by reading the 
> > > LSB/MSB
> > > of the counter. It's not MMIO like the HPET or ACPI timer.
> > >
> > > And as cheloha@ also points out, it is highly likely that none of us have 
> > > a
> > > real i8254 anymore, much of this is probably implemented in some EC 
> > > somewhere
> > > and it's unlikely the developer of said EC put a lot of effort into 
> > > optimizing
> > > the implementation of a legacy device like this.
> > >
> > > On the topic of virtualization:
> > >
> > > while (rdtsc() - start < want)
> > >  rdtsc();
> > 
> > I just realized the original diff didn't do two rdtscs. It did a pause 
> > inside the
> > loop. So the effect is not *as* bad as I described but it's still 
> > *somewhat* bad.
> > 
> > PS - pause loop exiting can be enabled to improve performance in this 
> > situation.
> 
> What I'm getting from Mike's and kettenis@'s replies is that this is a
> generally good idea.
> 
> We should add an HPET fallback for the nasty cases where your TSC has
> drift because the i8254 is slooow.  But "hpet_delay()" can wait
> for a later patch because we haven't switched the lapic into oneshot
> mode yet, so lapic_delay() is still useable and fast.
> 
> So, is this ok?  Or do I need to tweak something?  I think I'm setting
> delay_func to tsc_delay under the right circumstances: we know the TSC
> is invariant.
> 
> kettenis@: as I mentioned, we need to do the delay_func pointer
> comparison in lapic_calibrate_timer() to keep from clobbering
> tsc_delay.  We can't compare it with NULL because it is set to
> i8254_delay() by default in amd64/machdep.c.

Given that lapic_delay() will disappear in the future, maybe just
adding a local proptotype for tsc_delay() in lapic.c would make sense
instead of adding the header file?

Either way, ok kettenis@

> Index: amd64/lapic.c
> ===
> RCS file: /cvs/src/sys/arch/amd64/amd64/lapic.c,v
> retrieving revision 1.56
> diff -u -p -r1.56 lapic.c
> --- amd64/lapic.c 3 Sep 2020 21:38:46 -   1.56
> +++ amd64/lapic.c 5 Sep 2020 14:44:08 -
> @@ -41,6 +41,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -584,7 +585,8 @@ skip_calibration:
>* Now that the timer's calibrated, use the apic timer routines
>* for all our timing needs..
>*/
> - delay_func = lapic_delay;
> + if (delay_func != tsc_delay)
> + delay_func = lapic_delay;
&g

Re: timeout(9): add clock-based timeouts (attempt 2)

2020-09-05 Thread Mark Kettenis
> Date: Fri, 4 Sep 2020 17:55:39 -0500
> From: Scott Cheloha 
> 
> On Sat, Jul 25, 2020 at 08:46:08PM -0500, Scott Cheloha wrote:
> > 
> > [...]
> > 
> > I want to add clock-based timeouts to the kernel because tick-based
> > timeouts suffer from a few problems:
> > 
> > [...]
> > 
> > Basically, ticks are a poor approximation for the system clock.  We
> > should use the real thing where possible.
> > 
> > [...]
> > 
> > Thoughts on this approach?  Thoughts on the proposed API?
> 
> 6 week bump.
> 
> Attached is an rebased and streamlined diff.
> 
> Let's try again:
> 
> This patch adds support for timeouts scheduled against the hardware
> timecounter.  I call these "kclock timeouts".  They are distinct from
> the current tick-based timeouts because ticks are "software time", not
> "real time".

So what's the end game here?  Are these kclock-based timeouts going to
replace the tick-based timeouts at some point in the future?  I can
see why you want to have both in parallel for a while, but long-term I
don't think we want to keep both.  We don't really want to do a
wholesale conversion of APIs again I'd say.  So at some point the
existing timeout_add_xxx() calls should be implemented in terms of
"kclock timeouts".

This implementation is still tick driven, so it doesn't really provide
sub-tick resolution.  What does that mean for testing this?  I mean if
we spend a lot of time now to verify that subsystems can tolerate the
more fine-grained timeouts, we need to that again when you switch from
having a period interrupt driving the wheel to having a scheduled
interrupt isn't it?

> For now we have one kclock, KCLOCK_UPTIME, which corresponds to
> nanouptime(9).  In the future I intend to add support for runtime and
> UTC kclocks.

Do we really need that?  I suppose it helps implementing something
like clock_nanosleep() with the TIMER_ABSTIME flag for various
clock_id values?

> Why do we want kclock timeouts at all?
> 
> 1. Kclock timeouts expire at an actual time, not a tick.  They
>have nanosecond resolution and are NTP-sensitive.  Thus, they
>will *never* fire early.

Is there a lot of overhead in these being NTP-sensitive?  I'm asking
because for short timeouts you don't really care about NTP
corrections.

> 2. One upshot of nanosecond resolution is that we don't need to
>"round up" to the next tick when scheduling a timeout to prevent
>early execution.  The extra resolution allows us to reduce
>latency in some contexts.
> 
> 3. Kclock timeouts cover the entire range of the kernel timeline.
>We can remove the "tick loops" like the one sys_nanosleep().
> 
> 4. Kclock timeouts are scheduled as absolute deadlines.  This makes
>supporting absolute timeouts trivial, which means we can add support
>for clock_nanosleep(2) and the absolute pthread timeouts to the
>kernel.
> 
> Kclock timeouts aren't actually used anywhere yet, so merging this
> patch will not break anything like last time (CC bluhm@).
> 
> In a subsequent diff I will put them to use in tsleep_nsec(9) etc.
> This will enable those interfaces to block for less than a tick, which
> in turn will allow userspace to block for less than a tick in e.g.
> futex(2), and poll(2).  pd@ has verified that this fixes the "time
> problem" in OpenBSD vmm(4) VMs (CC pd@).

Like I said above, running the timeout is still tick-driven isn't it?
This avoids having to wait at least a tick for timeouts that are
shorter than a tick, but it means the timeout can still be extended up
to a full tick.

> You initialize kclock timeouts with timeout_set_kclock().  You
> schedule them with timeout_in_nsec(), a relative timeout interface
> that accepts a count of nanoseconds.  If your timeout is in some
> other unit (seconds, milliseconds, whatever) you must convert it
> to nanoseconds before scheduling.  Something like this will work:
> 
>   timeout_in_nsec(_timeout, SEC_TO_NSEC(1));
> 
> There won't be a flavored API supporting every conceivable time unit.

So this is where I get worried.  What is the game plan?  Slowly
convert everything from timeout_add_xxx() to timeout_in_nsec()?  Or
offer this as a temporary interface for people to test some critical
subsystems after which we dump it and simply re-implement
timeout_add_xxx() as kclock-based timeouts?

> In the future I will expose an absolute timeout interface and a
> periodic timeout rescheduling interface.  We don't need either of
> these interfaces to start, though.

Not sure how useful a periodic timeout rescheduling interface really
is if you have an absolute timeout interface.  And isn't
timeout_at_ts() already implemented in the diff?

> Tick-based timeouts and kclock-based timeouts are *not* compatible.
> You cannot schedule a kclock timeout with timeout_add(9).  You cannot
> schedule a tick-based timeout with timeout_in_nsec(9).  I have added
> KASSERTs to prevent this.
> 
> Scheduling a kclock timeout with timeout_in_nsec() is more costly than
> scheduling a tick-based 

Re: [PATCH] Add IOMMU support for Intel VT-d and AMD-Vi

2020-09-04 Thread Mark Kettenis
> Date: Fri, 4 Sep 2020 00:50:44 -0500
> From: Jordan Hargrave 

A few hints below...

> > > +
> > > +/* Page Table Entry per domain */
> > > +static struct ivhd_dte hwdte[65536] __aligned(PAGE_SIZE);
> > > +
> > > +/* Alias mapping */
> > > +#define SID_INVALID 0x8000L
> > > +static uint32_t sid_flag[65536];
> > 
> > Can we avoid having these large arrays, or at least allocate them
> > dynamically?  That would also avoid the explicit alignment which is
> > somewhat nasty since it affects the entire kernel.
> 
> OK. But the hwdte does need the 2M area to be all contiguous but it is not
> needed for DMAR/Intel.  You *can* have up to 8 different device table entries
> though to split up the area.

The appropriate interface to use in this context is
bus_dmamem_alloc(9).  You can specify alignment, and if you set nsegs
to 1, you will get memory that is physicaly contiguous.

To map the memory into kernel address space you'll need create a map
using bus_dmamap_create(9) and map it using bus_dmamem_map(9).  Then
instead of using pmap_extract(9) you use bus_dmamap_load_raw(9) which
then populates the physical addresses.

Many of the drivers written by dlg@ define convenience functions to do
all these steps, although interestingly enough he tends to use
bus_dmamap_load(9) instead of bus_dmamap_load_raw(9) which is
sub-optimal.

> > > +
> > > +struct domain_dev {
> > > + int sid;
> > > + int sec;
> > > + int sub;
> > > + TAILQ_ENTRY(domain_dev) link;
> > > +};
> > > +
> > > +struct domain {
> > > + struct iommu_softc  *iommu;
> > > + int did;
> > > + int gaw;
> > > + struct pte_entry*pte;
> > > + paddr_t ptep;
> > > + struct bus_dma_tag  dmat;
> > > + int flag;
> > > +
> > > + struct mutexexlck;
> > > + charexname[32];
> > > + struct extent   *iovamap;
> > > + TAILQ_HEAD(,domain_dev) devices;
> > > + TAILQ_ENTRY(domain) link;
> > > +};
> > > +
> > > +#define DOM_DEBUG 0x1
> > > +#define DOM_NOMAP 0x2
> > > +
> > > +struct dmar_devlist {
> > > + int type;
> > > + int bus;
> > > + int ndp;
> > > + struct acpidmar_devpath *dp;
> > > + TAILQ_ENTRY(dmar_devlist)   link;
> > > +};
> > > +
> > > +TAILQ_HEAD(devlist_head, dmar_devlist);
> > > +
> > > +struct ivhd_devlist {
> > > + int start_id;
> > > + int end_id;
> > > + int cfg;
> > > + TAILQ_ENTRY(ivhd_devlist)   link;
> > > +};
> > > +
> > > +struct rmrr_softc {
> > > + TAILQ_ENTRY(rmrr_softc) link;
> > > + struct devlist_head devices;
> > > + int segment;
> > > + uint64_tstart;
> > > + uint64_tend;
> > > +};
> > > +
> > > +struct atsr_softc {
> > > + TAILQ_ENTRY(atsr_softc) link;
> > > + struct devlist_head devices;
> > > + int segment;
> > > + int flags;
> > > +};
> > > +
> > > +struct iommu_pic {
> > > + struct pic  pic;
> > > + struct iommu_softc  *iommu;
> > > +};
> > > +
> > > +#define IOMMU_FLAGS_CATCHALL 0x1
> > > +#define IOMMU_FLAGS_BAD  0x2
> > > +#define IOMMU_FLAGS_SUSPEND  0x4
> > > +
> > > +struct iommu_softc {
> > > + TAILQ_ENTRY(iommu_softc)link;
> > > + struct devlist_head devices;
> > > + int id;
> > > + int flags;
> > > + int segment;
> > > +
> > > + struct mutexreg_lock;
> > > +
> > > + bus_space_tag_t iot;
> > > + bus_space_handle_t  ioh;
> > > +
> > > + uint64_tcap;
> > > + uint64_tecap;
> > > + uint32_tgcmd;
> > > +
> > > + int mgaw;
> > > + int agaw;
> > > + int ndoms;
> > > +
> > > + struct root_entry   *root;
> > > + struct context_entry*ctx[256];
> > > +
> > > + void*intr;
> > > + struct iommu_picpic;
> > > + int fedata;
> > > + uint64_tfeaddr;
> > > + uint64_trtaddr;
> > > +
> > > + // Queued Invalidation
> > > + int qi_head;
> > > + int qi_tail;
> > > + paddr_t qip;
> > > + struct qi_entry *qi;
> > > +
> > > + struct domain   *unity;
> > > + TAILQ_HEAD(,domain) domains;
> > > +
> > > + // AMD iommu
> > > + struct ivhd_dte *dte;
> > > + void*cmd_tbl;
> > > + void*evt_tbl;
> > > + paddr_t cmd_tblp;
> > > + paddr_t evt_tblp;
> > > + uint64_twv[128] __aligned(4096);
> > 
> > This wv array isn't used as far as I can tell.
> 
> Ah I was doing some testing on the 

Re: [PATCH] Add IOMMU support for Intel VT-d and AMD-Vi

2020-09-03 Thread Mark Kettenis
> Date: Tue, 1 Sep 2020 17:20:19 -0500
> From: Jordan Hargrave 
> 
> [PATCH] Add IOMMU support for Intel VT-d and AMD Vi
> 
> This hooks each pci device and overrides bus_dmamap_xxx to issue
> remap of DMA requests to virtual DMA space.  It protects devices
> from issuing I/O requests to memory in the system that is outside
> the requested DMA space.
> ---
>  sys/arch/amd64/conf/GENERIC  |1 +
>  sys/arch/amd64/conf/RAMDISK  |1 +
>  sys/arch/amd64/conf/RAMDISK_CD   |1 +
>  sys/arch/amd64/include/pci_machdep.h |3 +-
>  sys/arch/amd64/pci/pci_machdep.c |   15 +-
>  sys/dev/acpi/acpi.c  |5 +
>  sys/dev/acpi/acpidmar.c  | 2988 ++
>  sys/dev/acpi/acpidmar.h  |  534 +
>  sys/dev/acpi/acpireg.h   |   21 +-
>  sys/dev/acpi/amd_iommu.h |  358 +++
>  sys/dev/acpi/files.acpi  |5 +
>  sys/dev/pci/pci.c|   28 +
>  sys/dev/pci/pcivar.h |2 +
>  13 files changed, 3959 insertions(+), 3 deletions(-)
>  create mode 100644 sys/dev/acpi/acpidmar.c
>  create mode 100644 sys/dev/acpi/acpidmar.h
>  create mode 100644 sys/dev/acpi/amd_iommu.h

This needs some further cleanup and style love.  But let's leave that
alone for now.

How much of this code is really shared between DMAR and IVRS?  It
would be nice to split it out between those two if we can avoid code
duplication.

iommu_writel(), iommu_readl(), iommu_writeq() etc., are a bit too
Linuxy; iommu_write_4(), iommu_read_4(), iommu_write_8() would be
better names.

I don't fully grasp why you need acpidmar_intr_establish().  I can see
that MSI interrupts from devices behind the IOMMU need to go through
the IOMMU since they're essentially memory transaction.  But your code
seems to only deal with the IOMMU's error interrupt.  Does the IOMMU
interrupt itself go through the IOMMU as well?

Why do you need to explicitly call acpidmar_sw()?  Naively I would
think that you need to call this fairly late, but you call it before
config_suspend_all(DVACT_SUSPEND) happens.  Is there a reason why this
can't happen as part of normal config_suspend_all(DVACT_SUSPEND)
processing?

I think the way you use pci_probe_device_hook() is fine.

What is the point of having function that start with an underscore?
Feels like another Linux-ism to me...

A few more random things in the code below...


> diff --git a/sys/arch/amd64/conf/GENERIC b/sys/arch/amd64/conf/GENERIC
> index 2c49f91a1..1eda12bc9 100644
> --- a/sys/arch/amd64/conf/GENERIC
> +++ b/sys/arch/amd64/conf/GENERIC
> @@ -45,6 +45,7 @@ acpibtn*at acpi?
>  acpicpu* at acpi?
>  acpicmos*at acpi?
>  acpidock*at acpi?
> +acpidmar0at acpi?
>  acpiec*  at acpi?
>  acpipci* at acpi?
>  acpiprt* at acpi?
> diff --git a/sys/arch/amd64/conf/RAMDISK b/sys/arch/amd64/conf/RAMDISK
> index 10148add1..7ab48f32e 100644
> --- a/sys/arch/amd64/conf/RAMDISK
> +++ b/sys/arch/amd64/conf/RAMDISK
> @@ -34,6 +34,7 @@ acpipci*at acpi?
>  acpiprt* at acpi?
>  acpimadt0at acpi?
>  #acpitz* at acpi?
> +acpidmar*at acpi? disable
>  
>  mpbios0  at bios0
>  
> diff --git a/sys/arch/amd64/conf/RAMDISK_CD b/sys/arch/amd64/conf/RAMDISK_CD
> index 91022751e..82a24e210 100644
> --- a/sys/arch/amd64/conf/RAMDISK_CD
> +++ b/sys/arch/amd64/conf/RAMDISK_CD
> @@ -48,6 +48,7 @@ sdhc*   at acpi?
>  acpihve* at acpi?
>  chvgpio*at acpi?
>  glkgpio* at acpi?
> +acpidmar*at acpi? disable
>  
>  mpbios0  at bios0
>  
> diff --git a/sys/arch/amd64/include/pci_machdep.h 
> b/sys/arch/amd64/include/pci_machdep.h
> index bc295cc22..c725bdc73 100644
> --- a/sys/arch/amd64/include/pci_machdep.h
> +++ b/sys/arch/amd64/include/pci_machdep.h
> @@ -91,7 +91,8 @@ void
> *pci_intr_establish_cpu(pci_chipset_tag_t, pci_intr_handle_t,
>   int, struct cpu_info *,
>   int (*)(void *), void *, const char *);
>  void pci_intr_disestablish(pci_chipset_tag_t, void *);
> -#define  pci_probe_device_hook(c, a) (0)
> +int  pci_probe_device_hook(pci_chipset_tag_t,
> + struct pci_attach_args *);
>  
>  void pci_dev_postattach(struct device *, struct 
> pci_attach_args *);
>  
> diff --git a/sys/arch/amd64/pci/pci_machdep.c 
> b/sys/arch/amd64/pci/pci_machdep.c
> index cf4e835de..b700946a4 100644
> --- a/sys/arch/amd64/pci/pci_machdep.c
> +++ b/sys/arch/amd64/pci/pci_machdep.c
> @@ -89,6 +89,11 @@
>  #include 
>  #endif
>  
> +#include "acpi.h"
> +#if NACPI > 0
> +#include 
> +#endif
> +
>  /*
>   * Memory Mapped Configuration space access.
>   *
> @@ -797,7 +802,15 @@ pci_init_extents(void)
>   }
>  }
>  
> -#include "acpi.h"
> +int
> +pci_probe_device_hook(pci_chipset_tag_t pc, struct pci_attach_args *pa)
> +{
> +#if NACPI > 0
> + acpidmar_pci_hook(pc, pa);
> +#endif
> + return 0;
> +}
> +
>  #if 

Re: [PATCH] Add common PCIE capability list

2020-09-03 Thread Mark Kettenis
> Date: Wed, 2 Sep 2020 15:19:55 +1000
> From: Jonathan Gray 
> 
> On Tue, Sep 01, 2020 at 11:44:03PM -0500, Jordan Hargrave wrote:
> > This patch adds a common function for scanning PCIE Express Capability list
> > The PCIE Capability list starts at 0x100 in extended PCI configuration 
> > space.
> 
> This seems to only handle extended capabilities?
> Something like pcie_get_ext_capability() would be a better name.

I think it should be pci_get_ext_capability() which follows the
pattern set by pci_get_ht_capability().

> 
> It is 'PCI Express' not 'PCIExpress'
> 
> 'ofs & 3' test doesn't make sense when PCI_PCIE_ECAP_NEXT() always
> masks those bits.
> 
> > 
> > ---
> >  sys/dev/pci/pci.c| 28 
> >  sys/dev/pci/pcivar.h |  2 ++
> >  2 files changed, 30 insertions(+)
> > 
> > diff --git a/sys/dev/pci/pci.c b/sys/dev/pci/pci.c
> > index bf75f875e..8f9a5ef7a 100644
> > --- a/sys/dev/pci/pci.c
> > +++ b/sys/dev/pci/pci.c
> > @@ -677,6 +677,34 @@ pci_get_ht_capability(pci_chipset_tag_t pc, pcitag_t 
> > tag, int capid,
> > return (0);
> >  }
> >  
> > +int
> > +pcie_get_capability(pci_chipset_tag_t pc, pcitag_t tag, int capid,
> > +int *offset, pcireg_t *value)
> > +{
> > +   pcireg_t reg;
> > +   unsigned int ofs;
> > +
> > +   /* Make sure we support PCIExpress device */

PCI Express like jsg@ already mentioned.  Add a full stop at the end
of the sentence.

> > +   if (pci_get_capability(pc, tag, PCI_CAP_PCIEXPRESS, NULL, NULL) == 0)
> > +   return (0);
> > +   /* Scan PCIExpress capabilities */

Drop this comment and replace with a blank line such that it matches
pci_get_ht_capability().

> > +   ofs = PCI_PCIE_ECAP;
> > +   while (ofs != 0) {
> > +   if ((ofs & 3) || (ofs < PCI_PCIE_ECAP))
> > +   return (0);

Make this check #ifdef DIAGNOSTIC like pci_get_ht_capability() doesn.
Dropping the (ofs & 3) bit is indeed a good idea.

> > +   reg = pci_conf_read(pc, tag, ofs);
> > +   if (PCI_PCIE_ECAP_ID(reg) == capid) {
> > +   if (offset)
> > +   *offset = ofs;
> > +   if (value)
> > +   *value = reg;
> > +   return (1);
> > +   }
> > +   ofs = PCI_PCIE_ECAP_NEXT(reg);
> > +   }

Blank line here please.

> > +   return (0);
> > +}
> > +
> >  uint16_t
> >  pci_requester_id(pci_chipset_tag_t pc, pcitag_t tag)
> >  {
> > diff --git a/sys/dev/pci/pcivar.h b/sys/dev/pci/pcivar.h
> > index bdfe0404f..0376ba992 100644
> > --- a/sys/dev/pci/pcivar.h
> > +++ b/sys/dev/pci/pcivar.h
> > @@ -233,6 +233,8 @@ int pci_io_find(pci_chipset_tag_t, pcitag_t, int, 
> > bus_addr_t *,
> >  intpci_mem_find(pci_chipset_tag_t, pcitag_t, int, bus_addr_t *,
> > bus_size_t *, int *);
> >  
> > +intpcie_get_capability(pci_chipset_tag_t, pcitag_t, int,
> > +   int *, pcireg_t *);
> >  intpci_get_capability(pci_chipset_tag_t, pcitag_t, int,
> > int *, pcireg_t *);
> >  intpci_get_ht_capability(pci_chipset_tag_t, pcitag_t, int,
> > -- 
> > 2.26.2
> > 
> > 
> 
> 



Re: amd64: calibrate lapic timer frequency in constant time

2020-09-01 Thread Mark Kettenis
> Date: Tue, 1 Sep 2020 11:05:26 -0500
> From: Scott Cheloha 
> 
> Hi,
> 
> At boot, if we don't know the lapic frequency offhand we compute it by
> waiting for a known clock (the i8254) with a known frequency to cycle
> a few times.
> 
> Currently we cycle hz times.  This doesn't make sense.  There is
> little to no benefit to waiting additional cycles if your kernel is
> compiled with a larger HZ.  Mostly it just makes the calibration take
> longer.
> 
> Consider the common HZ=1000 case.  What is the benefit of looping an
> additional 900 times?  The point of diminishing returns is well under
> 1000 loops.
> 
> 20-50 loops is probably sufficient to limit our error, but I don't
> want to break anything so let's use 100, like we do on default
> kernels.
> 
> ok?

Sorry, but this makes no sense to me.  The current code waits for 1
second regarless of what HZ is.

And I expect that the accuracy of the measurement depends on the total
number elapsed time, so I expect a less acurate results if you only
wait 100 cycles at HZ=1000 (which is 0.1 second).

> Index: lapic.c
> ===
> RCS file: /cvs/src/sys/arch/amd64/amd64/lapic.c,v
> retrieving revision 1.55
> diff -u -p -r1.55 lapic.c
> --- lapic.c   3 Aug 2019 14:57:51 -   1.55
> +++ lapic.c   1 Sep 2020 15:58:41 -
> @@ -509,15 +509,15 @@ lapic_calibrate_timer(struct cpu_info *c
>  
>   startapic = lapic_gettick();
>  
> - /* wait the next hz cycles */
> - for (i = 0; i < hz; i++)
> + /* wait a few cycles */
> + for (i = 0; i < 100; i++)
>   wait_next_cycle();
>  
>   endapic = lapic_gettick();
>  
>   intr_restore(s);
>  
> - dtick = hz * rtclock_tval;
> + dtick = 100 * rtclock_tval;
>   dapic = startapic-endapic;
>  
>   /*
> 
> 



Re: amd64: add tsc_delay(), a TSC-based delay(9) implementation

2020-08-25 Thread Mark Kettenis
> Date: Tue, 25 Aug 2020 12:20:22 -0700
> From: Mike Larkin 
> 
> On Mon, Aug 24, 2020 at 12:29:15AM -0500, Scott Cheloha wrote:
> > On Sun, Aug 23, 2020 at 11:45:22PM -0500, Scott Cheloha wrote:
> > >
> > > [...]
> > >
> > > > > This patch (or something equivalent) is a prerequisite to running the
> > > > > lapic timer in oneshot or TSC deadline mode.  Using the lapic timer to
> > > > > implement delay(9) when it isn't running in periodic mode is too
> > > > > complicated.  However, using the i8254 for delay(9) is too slow.  We
> > > > > need an alternative.
> > > >
> > > > Hmm, but what are we going to use on machines where the TSC isn't
> > > > constant/invariant?
> > >
> > > Probably fall back on the i8254?  Unless someone wants to add yet
> > > another delay(9) implementation to amd64...
> > >
> > > > In what respect is the i8254 too slow?  Does it take more than a
> > > > microsecond to read it?
> > >
> > > On my machine, the portion of gettick() *within* the mutex runs in ~19
> > > microseconds.
> > >
> > > That's before any overhead from mtx_enter(9).  I think having multiple
> > > threads in delay(9) should be relatively rare, but you have to keep
> > > that in mind.
> > >
> > > No idea what the overhead would look like on real hardware.  I'm
> > > pretty sure my i8254 is emulated.
> > >
> > > > We could use the HPET I suppose, whic may be a bit better.
> > >
> > > It's better.  No mutex.  On my machine it takes ~11 microseconds.
> > > It's a start.
> >
> > Hmmm, now I'm worried I have screwed something up or misconfigured
> > something.
> >
> > It doesn't seem right that it would take 20K cycles to read the HPET
> > on this machine.
> >
> > Am I way off?  Or is 20K actually a reasonable number?
> >
> 
> There have been reports of the HPET being really slow on some machines.
> IIRC this is why we ended up getting a tsc timecounter a number of years
> ago. Someone (reyk@?) found his skylake had a super slow HPET and that
> ended up being part of the impetus to to a tsc timecounter.

I believe that was "discovered" years ago, before Skylake existed.

Anyway, yes, HPET is much slower.  But since both Intel and AMD have
seem to mess up the TSC every other CPU generation or so we have to
have a fallback.

> Also, 20k cycles is totally expected if you are on a VM (not sure if
> this is the case).

And in the end, delay(9) should not be used in performance critical
paths, so it doesn't matter all that much.  Your emulated com(4) may
result in some wated cycles perhaps.  But if you pushing lots of data
over your virtual serial port, maybe you should rethink what you're
doing.

> > For comparison, lapic_gettick() completes in... 80 nanoseconds (?) on
> > the same machine.  Relevant sysctls:
> >
> 
> LAPIC memory page accesses go to the CPU. It's not always the case that
> the HPET does the same (they may be accessed via PCI). Also, in a VM,
> on new CPUs, LAPIC virtualization can be enabled which means no exits
> for LAPIC accesses. So, yeah, these numbers you are seeing aren't surprising.
> 
> > $ sysctl hw.{model,setperf,perfpolicy} machdep.{tscfreq,invarianttsc}
> > hw.model=Intel(R) Core(TM) i7-8650U CPU @ 1.90GHz
> > hw.setperf=100
> > hw.perfpolicy=high
> > machdep.tscfreq=211200
> > machdep.invarianttsc=1
> >
> > ... if it really takes that long, then "high precision" is a bit of a
> > misnomer.
> >
> 



Re: sensor value last change time not updated?

2020-08-25 Thread Mark Kettenis
> Date: Tue, 25 Aug 2020 21:33:32 +0200
> From: Paul de Weerd 
> 
> Hi Mark,
> 
> Thanks for your reply.
> 
> On Tue, Aug 25, 2020 at 09:27:20PM +0200, Mark Kettenis wrote:
> | > I've dug out my stash of weird usb devices and found another sensor (a
> | > uthum(4), with only temperature support).  I have a few other sensors
> | > in live machines too (acpitz(4), cpu(4), admtemp(4), it(4), maybe some
> | > more) that I could look into.
> | > 
> | > Is there any interest in adding support for setting the tv member for
> | > non-time sensitive sensors?  Or should I drop this quest?
> | 
> | I don't understand the point.  None of the sensor drivers set that
> | member except the timedelta sensors.  I don't think adding code to do
> | so to all sensor drivers makes sense.
> 
> I'm inspecting it to only register "new" samples (even if the value
> itself doesn't change).  My logic is that if the tv member has
> changed, then the sensor value has been updated, so there's new
> "data".  The fact that it's the same temperature / humidity / other
> sensed value can also be interesting.
> 
> But if that doesn't make sense, then I can drop the patches and just
> do periodic sampling at the same interval the kernel uses (which I've
> not found yet, it seems that at least ugold(4) just sends data
> periodically (every ~6 seconds) which the kernel then presents to
> userland through sysctl).

Correct, most sensors are simply sampled periodically.



Re: sensor value last change time not updated?

2020-08-25 Thread Mark Kettenis
> Date: Tue, 25 Aug 2020 21:19:19 +0200
> From: Paul de Weerd 
> 
> Hi all,
> 
> I've dug out my stash of weird usb devices and found another sensor (a
> uthum(4), with only temperature support).  I have a few other sensors
> in live machines too (acpitz(4), cpu(4), admtemp(4), it(4), maybe some
> more) that I could look into.
> 
> Is there any interest in adding support for setting the tv member for
> non-time sensitive sensors?  Or should I drop this quest?

I don't understand the point.  None of the sensor drivers set that
member except the timedelta sensors.  I don't think adding code to do
so to all sensor drivers makes sense.

> uhidev6 at uhub3 port 1 configuration 1 interface 0 "Ten X Technology, Inc. 
> PCsensor Temper" rev 1.10/1.50 addr 10
> uhidev6: iclass 3/1
> uthum0 at uhidev6
> uhidev7 at uhub3 port 1 configuration 1 interface 1 "Ten X Technology, Inc. 
> PCsensor Temper" rev 1.10/1.50 addr 10
> uhidev7: iclass 3/0
> uthum1 at uhidev7
> uthum1: type ds75/12bit (temperature)
> 
> Paul 'WEiRD' de Weerd
> 
> (following diff has been compile tested only so far)
> 
> Index: uthum.c
> ===
> RCS file: /home/OpenBSD/cvs/src/sys/dev/usb/uthum.c,v
> retrieving revision 1.34
> diff -u -p -r1.34 uthum.c
> --- uthum.c   14 Feb 2020 14:55:30 -  1.34
> +++ uthum.c   25 Aug 2020 19:15:45 -
> @@ -662,6 +662,7 @@ uthum_refresh_temperhum(struct uthum_sof
>   sc->sc_sensor[UTHUM_TEMPERHUM_TEMP].sensor.flags &= ~SENSOR_FINVALID;
>   sc->sc_sensor[UTHUM_TEMPERHUM_HUM].sensor.value = rh;
>   sc->sc_sensor[UTHUM_TEMPERHUM_HUM].sensor.flags &= ~SENSOR_FINVALID;
> + microtime(>sc_sensor[UTHUM_TEMPERHUM_HUM].sensor.tv);
>  }
>  
>  void
> @@ -699,6 +700,7 @@ uthum_refresh_temper(struct uthum_softc 
>  
>   sc->sc_sensor[sensor].sensor.value = (temp * 1) + 27315;
>   sc->sc_sensor[sensor].sensor.flags &= ~SENSOR_FINVALID;
> + microtime(>sc_sensor[sensor].sensor.tv);
>  }
>  
>  void
> @@ -733,6 +735,7 @@ uthum_refresh_temperntc(struct uthum_sof
>   temp += sc->sc_sensor[sensor].cal_offset * 1;
>   sc->sc_sensor[sensor].sensor.value = temp;
>   sc->sc_sensor[sensor].sensor.flags &= ~SENSOR_FINVALID;
> + microtime(>sc_sensor[sensor].sensor.tv);
>   }
>  }
>  
> 
> On Sat, Aug 15, 2020 at 10:08:56AM +0200, Paul de Weerd wrote:
> | Thanks Hiltjo, that made me look at ugold.c.
> | 
> | With the below diff, my simple test program shows a value for the tv
> | struct member.
> | 
> | [weerd@pom] $ ./sensor_last_change
> | 1597477798.557355: 28.72
> | 
> | However, given what Hiltjo showed, the tv member seems to only be used
> | for time-sensors, so it may be completely on purpose that other
> | sensors don't expose this.  My rationale for inspecting tv is to
> | ensure that I only take 'unique' samples (so when tv of the new sample
> | differs from the previous one - while the actual value may remain the
> | same).
> | 
> | Is this worth adding?
> | 
> | Cheers,
> | 
> | Paul 'WEiRD' de Weerd
> | 
> | Index: ugold.c
> | ===
> | RCS file: /home/OpenBSD/cvs/src/sys/dev/usb/ugold.c,v
> | retrieving revision 1.14
> | diff -u -p -r1.14 ugold.c
> | --- ugold.c 5 Oct 2017 17:29:00 -   1.14
> | +++ ugold.c 15 Aug 2020 07:32:42 -
> | @@ -270,11 +270,13 @@ ugold_ds75_intr(struct uhidev *addr, voi
> | case 4:
> | temp = ugold_ds75_temp(buf[4], buf[5]);
> | sc->sc_sensor[UGOLD_OUTER].value = temp;
> | +   microtime(>sc_sensor[UGOLD_OUTER].tv);
> | sc->sc_sensor[UGOLD_OUTER].flags &= ~SENSOR_FINVALID;
> | /* FALLTHROUGH */
> | case 2:
> | temp = ugold_ds75_temp(buf[2], buf[3]);
> | sc->sc_sensor[UGOLD_INNER].value = temp;
> | +   microtime(>sc_sensor[UGOLD_INNER].tv);
> | sc->sc_sensor[UGOLD_INNER].flags &= ~SENSOR_FINVALID;
> | break;
> | default:
> | @@ -394,9 +396,11 @@ ugold_si700x_intr(struct uhidev *addr, v
> | sc->sc_hdev.sc_dev.dv_xname, buf[1]);
> | temp = ugold_si700x_temp(sc->sc_type, buf[2], buf[3]);
> | sc->sc_sensor[UGOLD_INNER].value = (temp * 1000) + 27315;
> | +   microtime(>sc_sensor[UGOLD_INNER].tv);
> | sc->sc_sensor[UGOLD_INNER].flags &= ~SENSOR_FINVALID;
> | rhum = ugold_si700x_rhum(sc->sc_type, buf[4], buf[5], temp);
> | sc->sc_sensor[UGOLD_HUM].value = rhum;
> | +   microtime(>sc_sensor[UGOLD_HUM].tv);
> | sc->sc_sensor[UGOLD_HUM].flags &= ~SENSOR_FINVALID;
> | break;
> | default:
> | 
> | On Fri, Aug 14, 2020 at 02:50:39PM +0200, Hiltjo Posthuma wrote:
> | | On Fri, Aug 14, 2020 at 01:46:57PM +0200, Paul de Weerd wrote:

Re: amd64: add tsc_delay(), a TSC-based delay(9) implementation

2020-08-23 Thread Mark Kettenis
> Date: Sun, 23 Aug 2020 18:11:12 -0500
> From: Scott Cheloha 
> 
> Hi,
> 
> Other BSDs use the TSC to implement delay(9) if the TSC is constant
> and invariant.  Here's a patch to add something similar to our kernel.

If the TSC is fine as a timecounter it should be absolutely fine for
use as delay().  And we could even use if the TSC isn't synchronized
between CPUs.

> 
> This patch (or something equivalent) is a prerequisite to running the
> lapic timer in oneshot or TSC deadline mode.  Using the lapic timer to
> implement delay(9) when it isn't running in periodic mode is too
> complicated.  However, using the i8254 for delay(9) is too slow.  We
> need an alternative.

Hmm, but what are we going to use on machines where the TSC isn't
constant/invariant?

In what respect is the i8254 too slow?  Does it take more than a
microsecond to read it?

We could use the HPET I suppose, whic may be a bit better.

> As for the patch, it works for me here, though I'd appreciate a few
> tests.  I admit that comparing function pointers is ugly, but I think
> this is as simple as it can be without implementing some sort of
> framework for "registering" delay(9) implementations and comparing
> them and selecting the "best" implementation.

What about:

if (delay_func == NULL)
delay_func = lapic_delay;

> I'm not sure I put the prototypes in the right headers.  We don't have
> a tsc.h but cpuvar.h looks sorta-correct for tsc_delay().

I think cpuvar.h is fine since it has other TSC-related stuff.
However, with my suggestion above you can drop that.

> FreeBSD's x86/delay.c may be of note:
> 
> https://github.com/freebsd/freebsd/blob/ed96335a07b688c39e16db8856232e5840bc22ac/sys/x86/x86/delay.c
> 
> Thoughts?
> 
> Index: amd64/tsc.c
> ===
> RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v
> retrieving revision 1.20
> diff -u -p -r1.20 tsc.c
> --- amd64/tsc.c   23 Aug 2020 21:38:47 -  1.20
> +++ amd64/tsc.c   23 Aug 2020 22:59:25 -
> @@ -26,6 +26,7 @@
>  
>  #include 
>  #include 
> +#include 
>  
>  #define RECALIBRATE_MAX_RETRIES  5
>  #define RECALIBRATE_SMI_THRESHOLD5
> @@ -252,7 +253,8 @@ tsc_timecounter_init(struct cpu_info *ci
>   tsc_timecounter.tc_quality = -1000;
>   tsc_timecounter.tc_user = 0;
>   tsc_is_invariant = 0;
> - }
> + } else
> + delay_func = tsc_delay;
>  
>   tc_init(_timecounter);
>  }
> @@ -342,4 +344,15 @@ tsc_sync_ap(struct cpu_info *ci)
>  {
>   tsc_post_ap(ci);
>   tsc_post_ap(ci);
> +}
> +
> +void
> +tsc_delay(int usecs)
> +{
> + uint64_t interval, start;
> +
> + interval = (uint64_t)usecs * tsc_frequency / 100;
> + start = rdtsc_lfence();
> + while (rdtsc_lfence() - start < interval)
> + CPU_BUSY_CYCLE();
>  }
> Index: amd64/lapic.c
> ===
> RCS file: /cvs/src/sys/arch/amd64/amd64/lapic.c,v
> retrieving revision 1.55
> diff -u -p -r1.55 lapic.c
> --- amd64/lapic.c 3 Aug 2019 14:57:51 -   1.55
> +++ amd64/lapic.c 23 Aug 2020 22:59:25 -
> @@ -41,6 +41,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -569,7 +570,8 @@ skip_calibration:
>* Now that the timer's calibrated, use the apic timer routines
>* for all our timing needs..
>*/
> - delay_func = lapic_delay;
> + if (delay_func != tsc_delay)
> + delay_func = lapic_delay;
>   initclock_func = lapic_initclocks;
>   }
>  }
> Index: include/cpuvar.h
> ===
> RCS file: /cvs/src/sys/arch/amd64/include/cpuvar.h,v
> retrieving revision 1.10
> diff -u -p -r1.10 cpuvar.h
> --- include/cpuvar.h  9 Aug 2019 15:20:05 -   1.10
> +++ include/cpuvar.h  23 Aug 2020 22:59:25 -
> @@ -102,4 +102,6 @@ void tsc_sync_drift(int64_t);
>  void tsc_sync_bp(struct cpu_info *);
>  void tsc_sync_ap(struct cpu_info *);
>  
> +void tsc_delay(int);
> +
>  #endif
> Index: include/i82489var.h
> ===
> RCS file: /cvs/src/sys/arch/amd64/include/i82489var.h,v
> retrieving revision 1.18
> diff -u -p -r1.18 i82489var.h
> --- include/i82489var.h   4 Oct 2018 05:00:40 -   1.18
> +++ include/i82489var.h   23 Aug 2020 22:59:26 -
> @@ -128,4 +128,6 @@ extern void lapic_calibrate_timer(struct
>  extern void lapic_startclock(void);
>  extern void lapic_initclocks(void);
>  
> +extern void lapic_delay(int);
> +
>  #endif
> 
> 



Re: timekeep: fixing large skews on amd64 with RDTSCP

2020-08-23 Thread Mark Kettenis
> Date: Sat, 22 Aug 2020 22:05:44 -0500
> From: Scott Cheloha 
> 
> On Tue, Jul 28, 2020 at 10:02:07AM +0300, Paul Irofti wrote:
> > 
> > [...]
> > 
> > Is the issue with LFENCE slowing down the network stack settled? That was
> > the argument against it last time.
> 
> ... a month passes.  Nobody says anything.
> 
> This "it might slow down the network stack" thing keeps coming up, and
> yet nobody can point to (a) who expressed this concern or (b) what the
> penalty is in practice.
> 
> Note that the alternative is "your timecounter might not be monotonic
> between threads".  For me, that's already a dealbreaker.
> 
> But for sake of discussion let's look at some data.  For those of you
> watching from home, please follow along!  I would like to know what
> your results look like.
> 
> To start, here is a microbenchmarking program for clock_gettime(2) on
> amd64.  If you have the userspace timecounter, then
> 
>   clock_gettime(CLOCK_MONOTONIC, ...);
> 
> is a suitable surrogate for nanouptime(9), so this microbenchmark can
> actually tell us about how nanouptime(9) or nanotime(9) would be
> impacted by a comparable change in the kernel timecounter.
> 
> --
> 
> /*
>  * clock_gettime-bench.c
>  */
> #include 
> #include 
> #include 
> #include 
> #include 
> 
> static uint64_t
> rdtsc_lfence(void)
> {
>   uint32_t hi, lo;
> 
>   __asm volatile("lfence; rdtsc; lfence" : "=d" (hi), "=a" (lo));
>   return ((uint64_t)hi << 32) | lo;
> }
> 
> int
> main(int argc, char *argv[])
> {
>   struct timespec now;
>   uint64_t begin, end;
>   long long count, i;
>   const char *errstr;
> 
>   if (argc != 2) {
>   fprintf(stderr, "usage: %s count\n", getprogname());
>   return 1;
>   }
>   count = strtonum(argv[1], 1, LLONG_MAX, );
>   if (errstr != NULL)
>   errx(1, "count is %s: %s", errstr, argv[1]);
> 
>   begin = rdtsc_lfence();
>   for (i = 0; i < count; i++)
>   clock_gettime(CLOCK_MONOTONIC, );
>   end = rdtsc_lfence();
> 
>   printf("%lld\t%llu\n", count, end - begin);
> 
>   return 0;
> }
> 
> --
> 
> Now consider a benchmark of 100K clock_gettime(2) calls against the
> userspace timecounter.
> 
> $ clock_gettime-bench 10
> 10  15703664
> 
> Let's collect 10K of these benchmarks -- our samples -- atop an
> unpatched libc.  Use the shell script below.  Note that we throw out
> samples where we hit a context switch.
> 
> --
> 
> #! /bin/sh
> 
> [ $# -ne 1 ] && exit 1
> RESULTS=$1
> shift
> 
> TIME=$(mktemp) || exit 1
> TMP=$(mktemp) || exit 1
> 
> # Collect 10K samples.
> i=0
> while [ $i -lt 1 ]; do
>   # Call clock_gettime(2) 100K times.
>   /usr/bin/time -l ~/scratch/clock_gettime-bench 10 > $TMP 2> $TIME
>   # Ignore this sample if a context switch occurred.
>   if egrep -q '[1-9][0-9]* +(in)?voluntary context' $TIME; then
>   continue
>   fi
>   cat $TMP >> $RESULTS
>   i=$((i + 1))
> done
> 
> rm $TMP $TIME
> 
> --
> 
> Run it like this:
> 
> $ ksh bench.sh unpatched.out
> 
> That will take ~5-10 minutes at most.
> 
> Next, we'll patch libc to add the LFENCE to the userspace timecounter.
> 
> Index: usertc.c
> ===
> RCS file: /cvs/src/lib/libc/arch/amd64/gen/usertc.c,v
> retrieving revision 1.2
> diff -u -p -r1.2 usertc.c
> --- usertc.c  8 Jul 2020 09:17:48 -   1.2
> +++ usertc.c  22 Aug 2020 22:18:47 -
> @@ -19,10 +19,10 @@
>  #include 
>  
>  static inline u_int
> -rdtsc(void)
> +rdtsc_lfence(void)
>  {
>   uint32_t hi, lo;
> - asm volatile("rdtsc" : "=a"(lo), "=d"(hi));
> + asm volatile("lfence; rdtsc" : "=a"(lo), "=d"(hi));
>   return ((uint64_t)lo)|(((uint64_t)hi)<<32);
>  }
>  
> @@ -31,7 +31,7 @@ tc_get_timecount(struct timekeep *tk, u_
>  {
>   switch (tk->tk_user) {
>   case TC_TSC:
> - *tc = rdtsc();
> + *tc = rdtsc_lfence();
>   return 0;
>   }
>  
> --
> 
> Recompile and reinstall libc.
> 
> Then rerun the benchmark.  Be careful not to overwrite our results
> from the unpatched libc:
> 
> $ ksh bench.sh patched.out
> 
> --
> 
> Alright, now let's compare the results.  I'm not a mathemagician so I
> use ministat and trust it implicitly.  A stat jock could probably do
> this in R or with some python, but I am not that clever, so I will
> stick with ministat.
> 
> There is no ministat port for OpenBSD, but it is pretty trivial to
> clone this github repo and build it on -current:
> 
> https://github.com/thorduri/ministat
> 
> --
> 
> Okay, you have ministat?
> 
> Let's compare the results.  We want the 2nd column in the output
> (-C2).  I'm not interested in the graph (-q), given our population
> size.  We have N=1, so let's push the CI up (-c 99.5).
> 
> $ ~/repo/ministat/ministat -C2 -q -c99.5 unpatched.out patched.out
> x unpatched.out
> + patched.out
> N   Min   

Re: sdmmc(4): add UHS-I support

2020-08-22 Thread Mark Kettenis
> Date: Mon, 17 Aug 2020 12:57:58 +0200 (CEST)
> From: Mark Kettenis 
> 
> > Date: Sun, 16 Aug 2020 19:32:03 +0200 (CEST)
> > From: Mark Kettenis 
> > 
> > The diff below adds support for higher speeds as supported by UHS-I SD
> > cards to the generic sdmmc(4) layer.  The diff in itself does not
> > enable the use of those modes.  That needs separate changes to the
> > SD/MMC controller drivers.  I have such a diff for amlmmc(4) that
> > allows me to use SDR50 mode.
> > 
> > However, to make sure this diff doesn't break existing lower speed
> > modes I'd appreciate tests on a variety of hardware.  So if sdmmc(4)
> > shows up in your dmesg, please test this by exercising your (u)SD or
> > (e)MMC cards.
> > 
> > Thanks,
> > 
> > Mark
> 
> Previous diff didn't build properly on amd64.  Here is a new diff.

I did not receive a lot of test reports, but the diff has been in
snaps for about a week.

ok?


> Index: dev/sdmmc/sdmmc.c
> ===
> RCS file: /cvs/src/sys/dev/sdmmc/sdmmc.c,v
> retrieving revision 1.57
> diff -u -p -r1.57 sdmmc.c
> --- dev/sdmmc/sdmmc.c 15 Aug 2020 13:21:02 -  1.57
> +++ dev/sdmmc/sdmmc.c 17 Aug 2020 10:38:11 -
> @@ -111,6 +111,10 @@ sdmmc_attach(struct device *parent, stru
>   printf(": 1-bit");
>   if (ISSET(saa->caps, SMC_CAPS_SD_HIGHSPEED))
>   printf(", sd high-speed");
> + if (ISSET(saa->caps, SMC_CAPS_UHS_SDR50))
> + printf(", sdr50");
> + if (ISSET(saa->caps, SMC_CAPS_UHS_SDR104))
> + printf(", sdr104");
>   if (ISSET(saa->caps, SMC_CAPS_MMC_HIGHSPEED))
>   printf(", mmc high-speed");
>   if (ISSET(saa->caps, SMC_CAPS_MMC_DDR52))
> Index: dev/sdmmc/sdmmc_mem.c
> ===
> RCS file: /cvs/src/sys/dev/sdmmc/sdmmc_mem.c,v
> retrieving revision 1.34
> diff -u -p -r1.34 sdmmc_mem.c
> --- dev/sdmmc/sdmmc_mem.c 14 Aug 2020 14:49:04 -  1.34
> +++ dev/sdmmc/sdmmc_mem.c 17 Aug 2020 10:38:11 -
> @@ -52,6 +52,7 @@ int sdmmc_mem_decode_scr(struct sdmmc_so
>  int  sdmmc_mem_send_cxd_data(struct sdmmc_softc *, int, void *, size_t);
>  int  sdmmc_mem_set_bus_width(struct sdmmc_function *, int);
>  int  sdmmc_mem_mmc_switch(struct sdmmc_function *, uint8_t, uint8_t, 
> uint8_t);
> +int  sdmmc_mem_signal_voltage(struct sdmmc_softc *, int);
>  
>  int  sdmmc_mem_sd_init(struct sdmmc_softc *, struct sdmmc_function *);
>  int  sdmmc_mem_mmc_init(struct sdmmc_softc *, struct sdmmc_function *);
> @@ -104,12 +105,16 @@ const int sdmmc_mmc_timings[] = {
>  int
>  sdmmc_mem_enable(struct sdmmc_softc *sc)
>  {
> - u_int32_t host_ocr;
> - u_int32_t card_ocr;
> + uint32_t host_ocr;
> + uint32_t card_ocr;
> + uint32_t new_ocr;
> + uint32_t ocr = 0;
> + int error;
>  
>   rw_assert_wrlock(>sc_lock);
>  
>   /* Set host mode to SD "combo" card or SD memory-only. */
> + CLR(sc->sc_flags, SMF_UHS_MODE);
>   SET(sc->sc_flags, SMF_SD_MODE|SMF_MEM_MODE);
>  
>   /* Reset memory (*must* do that before CMD55 or CMD1). */
> @@ -153,14 +158,86 @@ sdmmc_mem_enable(struct sdmmc_softc *sc)
>  
>   host_ocr &= card_ocr; /* only allow the common voltages */
>  
> - if (sdmmc_send_if_cond(sc, card_ocr) == 0)
> - host_ocr |= SD_OCR_SDHC_CAP;
> + if (ISSET(sc->sc_flags, SMF_SD_MODE)) {
> + if (sdmmc_send_if_cond(sc, card_ocr) == 0)
> + SET(ocr, MMC_OCR_HCS);
> +
> + if (sdmmc_chip_host_ocr(sc->sct, sc->sch) & MMC_OCR_S18A)
> + SET(ocr, MMC_OCR_S18A);
> + }
> + host_ocr |= ocr;
>  
>   /* Send the new OCR value until all cards are ready. */
> - if (sdmmc_mem_send_op_cond(sc, host_ocr, NULL) != 0) {
> + if (sdmmc_mem_send_op_cond(sc, host_ocr, _ocr) != 0) {
>   DPRINTF(("%s: can't send memory OCR\n", DEVNAME(sc)));
>   return 1;
>   }
> +
> + if (ISSET(sc->sc_flags, SMF_SD_MODE) && ISSET(new_ocr, MMC_OCR_S18A)) {
> + /*
> +  * Card and host support low voltage mode, begin switch
> +  * sequence.
> +  */
> + struct sdmmc_command cmd;
> +
> + memset(, 0, sizeof(cmd));
> + cmd.c_arg = 0;
> + cmd.c_flags = SCF_CMD_AC | SCF_RSP_R1;
> + cmd.c_opcode = SD_VOLTAGE_SWITCH;
> + DPRINTF(("%s: switching card to 1.8

Re: Push KERNEL_LOCK/UNLOCK in trapsignal()

2020-08-18 Thread Mark Kettenis
> Date: Tue, 18 Aug 2020 11:52:17 +0200
> From: Martin Pieuchot 
> 
> Taken from a larger diff from claudio@, this reduces the lock dances in
> MD code and put it where we should focus our effort in kern/kern_sig.c.
> 
> ok?

Agreed.  ok kettenis@

> Index: kern/kern_sig.c
> ===
> RCS file: /cvs/src/sys/kern/kern_sig.c,v
> retrieving revision 1.258
> diff -u -p -r1.258 kern_sig.c
> --- kern/kern_sig.c   15 Jun 2020 13:18:33 -  1.258
> +++ kern/kern_sig.c   18 Aug 2020 09:34:11 -
> @@ -802,6 +802,7 @@ trapsignal(struct proc *p, int signum, u
>   struct sigacts *ps = pr->ps_sigacts;
>   int mask;
>  
> + KERNEL_LOCK();
>   switch (signum) {
>   case SIGILL:
>   case SIGBUS:
> @@ -842,6 +843,7 @@ trapsignal(struct proc *p, int signum, u
>   sigexit(p, signum);
>   ptsignal(p, signum, STHREAD);
>   }
> + KERNEL_UNLOCK();
>  }
>  
>  /*
> Index: arch/alpha/alpha/trap.c
> ===
> RCS file: /cvs/src/sys/arch/alpha/alpha/trap.c,v
> retrieving revision 1.88
> diff -u -p -r1.88 trap.c
> --- arch/alpha/alpha/trap.c   6 Sep 2019 12:22:01 -   1.88
> +++ arch/alpha/alpha/trap.c   18 Aug 2020 09:18:54 -
> @@ -488,9 +488,7 @@ do_fault:
>   printtrap(a0, a1, a2, entry, framep, 1, user);
>  #endif
>   sv.sival_ptr = v;
> - KERNEL_LOCK();
>   trapsignal(p, i, ucode, typ, sv);
> - KERNEL_UNLOCK();
>  out:
>   if (user) {
>   /* Do any deferred user pmap operations. */
> Index: arch/amd64/amd64/trap.c
> ===
> RCS file: /cvs/src/sys/arch/amd64/amd64/trap.c,v
> retrieving revision 1.79
> diff -u -p -r1.79 trap.c
> --- arch/amd64/amd64/trap.c   21 Jan 2020 03:06:39 -  1.79
> +++ arch/amd64/amd64/trap.c   18 Aug 2020 09:18:54 -
> @@ -391,9 +391,7 @@ usertrap(struct trapframe *frame)
>   }
>  
>   sv.sival_ptr = (void *)frame->tf_rip;
> - KERNEL_LOCK();
>   trapsignal(p, sig, type, code, sv);
> - KERNEL_UNLOCK();
>  
>  out:
>   userret(p);
> Index: arch/arm/arm/fault.c
> ===
> RCS file: /cvs/src/sys/arch/arm/arm/fault.c,v
> retrieving revision 1.39
> diff -u -p -r1.39 fault.c
> --- arch/arm/arm/fault.c  6 Sep 2019 12:22:01 -   1.39
> +++ arch/arm/arm/fault.c  18 Aug 2020 09:18:54 -
> @@ -373,9 +373,7 @@ data_abort_handler(trapframe_t *tf)
>   sd.trap = fsr;
>  do_trapsignal:
>   sv.sival_int = sd.addr;
> - KERNEL_LOCK();
>   trapsignal(p, sd.signo, sd.trap, sd.code, sv);
> - KERNEL_UNLOCK();
>  out:
>   /* If returning to user mode, make sure to invoke userret() */
>   if (user)
> @@ -596,13 +594,9 @@ prefetch_abort_handler(trapframe_t *tf)
>   printf("UVM: pid %d (%s), uid %d killed: "
>   "out of swap\n", p->p_p->ps_pid, p->p_p->ps_comm,
>   p->p_ucred ? (int)p->p_ucred->cr_uid : -1);
> - KERNEL_LOCK();
>   trapsignal(p, SIGKILL, 0, SEGV_MAPERR, sv);
> - KERNEL_UNLOCK();
>   } else {
> - KERNEL_LOCK();
>   trapsignal(p, SIGSEGV, 0, SEGV_MAPERR, sv);
> - KERNEL_UNLOCK();
>   }
>  
>  out:
> Index: arch/arm/arm/undefined.c
> ===
> RCS file: /cvs/src/sys/arch/arm/arm/undefined.c,v
> retrieving revision 1.13
> diff -u -p -r1.13 undefined.c
> --- arch/arm/arm/undefined.c  13 Mar 2019 09:28:21 -  1.13
> +++ arch/arm/arm/undefined.c  18 Aug 2020 09:18:54 -
> @@ -113,9 +113,7 @@ gdb_trapper(u_int addr, u_int insn, stru
>   if (insn == GDB_BREAKPOINT || insn == GDB5_BREAKPOINT) {
>   if (code == FAULT_USER) {
>   sv.sival_int = addr;
> - KERNEL_LOCK();
>   trapsignal(p, SIGTRAP, 0, TRAP_BRKPT, sv);
> - KERNEL_UNLOCK();
>   return 0;
>   }
>   }
> @@ -174,9 +172,7 @@ undefinedinstruction(trapframe_t *frame)
>   if (__predict_false((fault_pc & 3) != 0)) {
>   /* Give the user an illegal instruction signal. */
>   sv.sival_int = (u_int32_t) fault_pc;
> - KERNEL_LOCK();
>   trapsignal(p, SIGILL, 0, ILL_ILLOPC, sv);
> - KERNEL_UNLOCK();
>   userret(p);
>   return;
>   }
> @@ -260,9 +256,7 @@ undefinedinstruction(trapframe_t *frame)
>   }
>  
>   sv.sival_int = frame->tf_pc;
> - KERNEL_LOCK();
>   trapsignal(p, SIGILL, 0, ILL_ILLOPC, sv);
> - KERNEL_UNLOCK();
>   }
>  
>   if ((fault_code & FAULT_USER) == 0)
> Index: arch/arm64/arm64/trap.c
> 

Re: Fewer pool_get() in kqueue_register()

2020-08-18 Thread Mark Kettenis
> Date: Tue, 18 Aug 2020 11:04:47 +0200
> From: Martin Pieuchot 
> 
> Diff below changes the order of operations in kqueue_register() to get
> rid of an unnecessary pool_get().  When an event is already present on
> the list try to acquire it first.  Note that knote_acquire() may sleep
> in which case the list might have changed so the lookup has to always
> begin from the start.
> 
> This will help with lazy removal of knote in poll/select.  In this
> scenario EV_ADD is generally always done with an knote already on the
> list.
> 
> ok?

But pool_get() may sleep as well.  In my experience it is better to do
the resource allocation up front and release afterwards if it turned
out you didn't need the resource.  That's what the current code does.
I don't fully understand how the code works, but your change looks
wrong to me.

> Index: kern/kern_event.c
> ===
> RCS file: /cvs/src/sys/kern/kern_event.c,v
> retrieving revision 1.142
> diff -u -p -r1.142 kern_event.c
> --- kern/kern_event.c 12 Aug 2020 13:49:24 -  1.142
> +++ kern/kern_event.c 18 Aug 2020 08:58:27 -
> @@ -696,7 +696,7 @@ kqueue_register(struct kqueue *kq, struc
>   struct filedesc *fdp = kq->kq_fdp;
>   const struct filterops *fops = NULL;
>   struct file *fp = NULL;
> - struct knote *kn = NULL, *newkn = NULL;
> + struct knote *kn, *newkn = NULL;
>   struct knlist *list = NULL;
>   int s, error = 0;
>  
> @@ -721,22 +721,12 @@ kqueue_register(struct kqueue *kq, struc
>   return (EBADF);
>   }
>  
> - if (kev->flags & EV_ADD)
> - newkn = pool_get(_pool, PR_WAITOK | PR_ZERO);
> -
>  again:
> + kn = NULL;
>   if (fops->f_flags & FILTEROP_ISFD) {
> - if ((fp = fd_getfile(fdp, kev->ident)) == NULL) {
> - error = EBADF;
> - goto done;
> - }
> - if (kev->flags & EV_ADD)
> - kqueue_expand_list(kq, kev->ident);
>   if (kev->ident < kq->kq_knlistsize)
>   list = >kq_knlist[kev->ident];
>   } else {
> - if (kev->flags & EV_ADD)
> - kqueue_expand_hash(kq);
>   if (kq->kq_knhashmask != 0) {
>   list = >kq_knhash[
>   KN_HASH((u_long)kev->ident, kq->kq_knhashmask)];
> @@ -749,10 +739,6 @@ again:
>   s = splhigh();
>   if (!knote_acquire(kn)) {
>   splx(s);
> - if (fp != NULL) {
> - FRELE(fp, p);
> - fp = NULL;
> - }
>   goto again;
>   }
>   splx(s);
> @@ -760,6 +746,21 @@ again:
>   }
>   }
>   }
> +
> + if (kev->flags & EV_ADD && kn == NULL) {
> + newkn = pool_get(_pool, PR_WAITOK | PR_ZERO);
> + if (fops->f_flags & FILTEROP_ISFD) {
> + if ((fp = fd_getfile(fdp, kev->ident)) == NULL) {
> + error = EBADF;
> + goto done;
> + }
> + kqueue_expand_list(kq, kev->ident);
> + } else {
> + kqueue_expand_hash(kq);
> + }
> +
> + }
> +
>   KASSERT(kn == NULL || (kn->kn_status & KN_PROCESSING) != 0);
>  
>   if (kn == NULL && ((kev->flags & EV_ADD) == 0)) {
> 
> 



Re: Enable arm64 PAN feature

2020-08-18 Thread Mark Kettenis
> From: Dale Rahn 
> Date: Mon, 17 Aug 2020 18:33:29 -0500
> 
> could we check that there is not an ESR value that indicates PAN violation
> instead of using 'instruction recognition'?

Doesn't exist unfortunately.  You get a protection fault, but you get
the same protection fault if you try to write to a read-only page for
example.

> Seems that it would be more reliable.
> Thanks
> Dale
> 
> On Mon, Aug 17, 2020 at 1:30 AM Jonathan Gray  wrote:
> 
>  On Sat, Aug 15, 2020 at 01:54:34PM +0200, Mark Kettenis wrote:
>  > > Date: Sat, 15 Aug 2020 20:21:09 +1000
>  > > From: Jonathan Gray 
>  > > 
>  > > On Fri, Aug 14, 2020 at 11:06:59PM +0200, Mark Kettenis wrote:
>  > > > > Date: Fri, 14 Aug 2020 14:40:23 +0200 (CEST)
>  > > > > From: Mark Kettenis 
>  > > > > 
>  > > > > I suppose a way to test this properly is to pick a system call
>  and
>  > > > > replace a copyin() with a direct access?  That will succeed
>  without
>  > > > > PAN but should fail with PAN enabled right?
>  > > > 
>  > > > So that does indeed work.  However, the result is a hard hang.  So
>  > > > here as an additional diff that makes sure we panic instead.  The
>  idea
>  > > > is that all user-space access from the kernel should be done by the
>  > > > special unprivileged load/store instructions.
>  > > 
>  > > Would disabling PSTATE.PAN in copyin/copyout along the lines of how
>  > > stac/clac is done for SMAP avoid having to test the instruction type
>  > > entirely?
>  > 
>  > No.  The problem is that we meed to catch permission faults caused by
>  > PAN.  But since the faulting address may be valid in the sense that
>  > UVM has a mapping for them that allows the requested access.  In that
>  > case we end up looping since uvm_fault() returns 0 and we retry the
>  > faulting instruction.
>  > 
>  > > Is it faulting on valid copyin/copyout the way you have it at the
>  > > moment?  I don't quite follow what is going on.
>  > 
>  > The copyin/copyout functions use the unpriviliged load/store
>  > instructions (LDTR/STTR) which bypass PAN.  But we may still fault
>  > because the memory hasn't been faulted in or because the memory has
>  > been marked read-only for CoW or for MOD/REF emulation.  And some of
>  > those faults manifest themselves as permission faults as well.
>  > 
>  > Currently (without the diff quoted below) those faults will be handled
>  > just fine.  The diff below needs to make sure this continues to be the
>  > case.  The easiest way to do that is to check the instruction.
>  > 
>  > Note that this check is in the "slow path".  In most cases the address
>  > touched by copyin/copyout will already be in the page tables since
>  > userland touched it already.
>  > 
>  > Does that clarfify things?
> 
>  Yes, thanks.  I'm fine with both of these diffs going in but still think
>  you should change the mask.
> 
>  > 
>  > 
>  > > > Index: arch/arm64/arm64/trap.c
>  > > > ===
>  > > > RCS file: /cvs/src/sys/arch/arm64/arm64/trap.c,v
>  > > > retrieving revision 1.27
>  > > > diff -u -p -r1.27 trap.c
>  > > > --- arch/arm64/arm64/trap.c   6 Jan 2020 12:37:30 -  
>  1.27
>  > > > +++ arch/arm64/arm64/trap.c   14 Aug 2020 21:05:54 -
>  > > > @@ -65,6 +65,14 @@ void do_el0_error(struct trapframe *);
>  > > >  
>  > > >  void dumpregs(struct trapframe*);
>  > > >  
>  > > > +/* Check whether we're executing an unprivileged load/store
>  instruction. */
>  > > > +static inline int
>  > > > +is_unpriv_ldst(uint64_t elr)
>  > > > +{
>  > > > + uint32_t insn = *(uint32_t *)elr;
>  > > > + return ((insn & 0x3f200c00) == 0x38000800);
>  > > 
>  > > The value of op1 (bit 26) is not used according to the table in the
>  Arm
>  > > ARM.  The mask would be better as 0x3b200c00
>  > > 
>  > > 
>  > > > +}
>  > > > +
>  > > >  static void
>  > > >  data_abort(struct trapframe *frame, uint64_t esr, uint64_t far,
>  > > >  int lower, int exe)
>  > > > @@ -104,8 +112,18 @@ data_abort(struct trapframe *frame, uint
>  > > >   /* The top bit tells us which range to use */
>  > > >   if ((far >> 63) == 1)
>  > > >   map = kernel_map;
>  > > > - else
>  > > > + else {
>  > > > + /*
>  > > > +  * Only allow user-space access using
>  > > > +  * unprivileged load/store instructions.
>  > > > +  */
>  > > > + if (!is_unpriv_ldst(frame->tf_elr)) {
>  > > > + panic("attempt to access user address"
>  > > > +   " 0x%llx from EL1", far);
>  > > > + }
>  > > > +
>  > > >   map = >p_vmspace->vm_map;
>  > > > + }
>  > > >   }
>  > > >  
>  > > >   if (exe)
>  > > > 
>  > > > 
>  > > 
>  > 
>  > 



Re: sdmmc(4): add UHS-I support

2020-08-17 Thread Mark Kettenis
> Date: Sun, 16 Aug 2020 19:32:03 +0200 (CEST)
> From: Mark Kettenis 
> 
> The diff below adds support for higher speeds as supported by UHS-I SD
> cards to the generic sdmmc(4) layer.  The diff in itself does not
> enable the use of those modes.  That needs separate changes to the
> SD/MMC controller drivers.  I have such a diff for amlmmc(4) that
> allows me to use SDR50 mode.
> 
> However, to make sure this diff doesn't break existing lower speed
> modes I'd appreciate tests on a variety of hardware.  So if sdmmc(4)
> shows up in your dmesg, please test this by exercising your (u)SD or
> (e)MMC cards.
> 
> Thanks,
> 
> Mark

Previous diff didn't build properly on amd64.  Here is a new diff.

Index: dev/sdmmc/sdmmc.c
===
RCS file: /cvs/src/sys/dev/sdmmc/sdmmc.c,v
retrieving revision 1.57
diff -u -p -r1.57 sdmmc.c
--- dev/sdmmc/sdmmc.c   15 Aug 2020 13:21:02 -  1.57
+++ dev/sdmmc/sdmmc.c   17 Aug 2020 10:38:11 -
@@ -111,6 +111,10 @@ sdmmc_attach(struct device *parent, stru
printf(": 1-bit");
if (ISSET(saa->caps, SMC_CAPS_SD_HIGHSPEED))
printf(", sd high-speed");
+   if (ISSET(saa->caps, SMC_CAPS_UHS_SDR50))
+   printf(", sdr50");
+   if (ISSET(saa->caps, SMC_CAPS_UHS_SDR104))
+   printf(", sdr104");
if (ISSET(saa->caps, SMC_CAPS_MMC_HIGHSPEED))
printf(", mmc high-speed");
if (ISSET(saa->caps, SMC_CAPS_MMC_DDR52))
Index: dev/sdmmc/sdmmc_mem.c
===
RCS file: /cvs/src/sys/dev/sdmmc/sdmmc_mem.c,v
retrieving revision 1.34
diff -u -p -r1.34 sdmmc_mem.c
--- dev/sdmmc/sdmmc_mem.c   14 Aug 2020 14:49:04 -  1.34
+++ dev/sdmmc/sdmmc_mem.c   17 Aug 2020 10:38:11 -
@@ -52,6 +52,7 @@ int   sdmmc_mem_decode_scr(struct sdmmc_so
 intsdmmc_mem_send_cxd_data(struct sdmmc_softc *, int, void *, size_t);
 intsdmmc_mem_set_bus_width(struct sdmmc_function *, int);
 intsdmmc_mem_mmc_switch(struct sdmmc_function *, uint8_t, uint8_t, 
uint8_t);
+intsdmmc_mem_signal_voltage(struct sdmmc_softc *, int);
 
 intsdmmc_mem_sd_init(struct sdmmc_softc *, struct sdmmc_function *);
 intsdmmc_mem_mmc_init(struct sdmmc_softc *, struct sdmmc_function *);
@@ -104,12 +105,16 @@ const int sdmmc_mmc_timings[] = {
 int
 sdmmc_mem_enable(struct sdmmc_softc *sc)
 {
-   u_int32_t host_ocr;
-   u_int32_t card_ocr;
+   uint32_t host_ocr;
+   uint32_t card_ocr;
+   uint32_t new_ocr;
+   uint32_t ocr = 0;
+   int error;
 
rw_assert_wrlock(>sc_lock);
 
/* Set host mode to SD "combo" card or SD memory-only. */
+   CLR(sc->sc_flags, SMF_UHS_MODE);
SET(sc->sc_flags, SMF_SD_MODE|SMF_MEM_MODE);
 
/* Reset memory (*must* do that before CMD55 or CMD1). */
@@ -153,14 +158,86 @@ sdmmc_mem_enable(struct sdmmc_softc *sc)
 
host_ocr &= card_ocr; /* only allow the common voltages */
 
-   if (sdmmc_send_if_cond(sc, card_ocr) == 0)
-   host_ocr |= SD_OCR_SDHC_CAP;
+   if (ISSET(sc->sc_flags, SMF_SD_MODE)) {
+   if (sdmmc_send_if_cond(sc, card_ocr) == 0)
+   SET(ocr, MMC_OCR_HCS);
+
+   if (sdmmc_chip_host_ocr(sc->sct, sc->sch) & MMC_OCR_S18A)
+   SET(ocr, MMC_OCR_S18A);
+   }
+   host_ocr |= ocr;
 
/* Send the new OCR value until all cards are ready. */
-   if (sdmmc_mem_send_op_cond(sc, host_ocr, NULL) != 0) {
+   if (sdmmc_mem_send_op_cond(sc, host_ocr, _ocr) != 0) {
DPRINTF(("%s: can't send memory OCR\n", DEVNAME(sc)));
return 1;
}
+
+   if (ISSET(sc->sc_flags, SMF_SD_MODE) && ISSET(new_ocr, MMC_OCR_S18A)) {
+   /*
+* Card and host support low voltage mode, begin switch
+* sequence.
+*/
+   struct sdmmc_command cmd;
+
+   memset(, 0, sizeof(cmd));
+   cmd.c_arg = 0;
+   cmd.c_flags = SCF_CMD_AC | SCF_RSP_R1;
+   cmd.c_opcode = SD_VOLTAGE_SWITCH;
+   DPRINTF(("%s: switching card to 1.8V\n", DEVNAME(sc)));
+   error = sdmmc_mmc_command(sc, );
+   if (error) {
+   DPRINTF(("%s: voltage switch command failed\n",
+   SDMMCDEVNAME(sc)));
+   return error;
+   }
+
+   error = sdmmc_mem_signal_voltage(sc, SDMMC_SIGNAL_VOLTAGE_180);
+   if (error)
+   return error;
+
+   SET(sc->sc_flags, SMF_UHS_MODE);
+   }
+
+   return 0;
+}
+
+int
+sdmmc_mem_signal_voltage(struct sdmmc_softc

sdmmc(4): add UHS-I support

2020-08-16 Thread Mark Kettenis
The diff below adds support for higher speeds as supported by UHS-I SD
cards to the generic sdmmc(4) layer.  The diff in itself does not
enable the use of those modes.  That needs separate changes to the
SD/MMC controller drivers.  I have such a diff for amlmmc(4) that
allows me to use SDR50 mode.

However, to make sure this diff doesn't break existing lower speed
modes I'd appreciate tests on a variety of hardware.  So if sdmmc(4)
shows up in your dmesg, please test this by exercising your (u)SD or
(e)MMC cards.

Thanks,

Mark


Index: dev/sdmmc/sdmmc.c
===
RCS file: /cvs/src/sys/dev/sdmmc/sdmmc.c,v
retrieving revision 1.57
diff -u -p -r1.57 sdmmc.c
--- dev/sdmmc/sdmmc.c   15 Aug 2020 13:21:02 -  1.57
+++ dev/sdmmc/sdmmc.c   16 Aug 2020 17:15:55 -
@@ -111,6 +111,10 @@ sdmmc_attach(struct device *parent, stru
printf(": 1-bit");
if (ISSET(saa->caps, SMC_CAPS_SD_HIGHSPEED))
printf(", sd high-speed");
+   if (ISSET(saa->caps, SMC_CAPS_UHS_SDR50))
+   printf(", sdr50");
+   if (ISSET(saa->caps, SMC_CAPS_UHS_SDR104))
+   printf(", sdr104");
if (ISSET(saa->caps, SMC_CAPS_MMC_HIGHSPEED))
printf(", mmc high-speed");
if (ISSET(saa->caps, SMC_CAPS_MMC_DDR52))
Index: dev/sdmmc/sdmmc_mem.c
===
RCS file: /cvs/src/sys/dev/sdmmc/sdmmc_mem.c,v
retrieving revision 1.34
diff -u -p -r1.34 sdmmc_mem.c
--- dev/sdmmc/sdmmc_mem.c   14 Aug 2020 14:49:04 -  1.34
+++ dev/sdmmc/sdmmc_mem.c   16 Aug 2020 17:15:55 -
@@ -52,6 +52,7 @@ int   sdmmc_mem_decode_scr(struct sdmmc_so
 intsdmmc_mem_send_cxd_data(struct sdmmc_softc *, int, void *, size_t);
 intsdmmc_mem_set_bus_width(struct sdmmc_function *, int);
 intsdmmc_mem_mmc_switch(struct sdmmc_function *, uint8_t, uint8_t, 
uint8_t);
+intsdmmc_mem_signal_voltage(struct sdmmc_softc *, int);
 
 intsdmmc_mem_sd_init(struct sdmmc_softc *, struct sdmmc_function *);
 intsdmmc_mem_mmc_init(struct sdmmc_softc *, struct sdmmc_function *);
@@ -104,12 +105,16 @@ const int sdmmc_mmc_timings[] = {
 int
 sdmmc_mem_enable(struct sdmmc_softc *sc)
 {
-   u_int32_t host_ocr;
-   u_int32_t card_ocr;
+   uint32_t host_ocr;
+   uint32_t card_ocr;
+   uint32_t new_ocr;
+   uint32_t ocr = 0;
+   int error;
 
rw_assert_wrlock(>sc_lock);
 
/* Set host mode to SD "combo" card or SD memory-only. */
+   CLR(sc->sc_flags, SMF_UHS_MODE);
SET(sc->sc_flags, SMF_SD_MODE|SMF_MEM_MODE);
 
/* Reset memory (*must* do that before CMD55 or CMD1). */
@@ -153,14 +158,86 @@ sdmmc_mem_enable(struct sdmmc_softc *sc)
 
host_ocr &= card_ocr; /* only allow the common voltages */
 
-   if (sdmmc_send_if_cond(sc, card_ocr) == 0)
-   host_ocr |= SD_OCR_SDHC_CAP;
+   if (ISSET(sc->sc_flags, SMF_SD_MODE)) {
+   if (sdmmc_send_if_cond(sc, card_ocr) == 0)
+   SET(ocr, MMC_OCR_HCS);
+
+   if (sdmmc_chip_host_ocr(sc->sct, sc->sch) & MMC_OCR_S18A)
+   SET(ocr, MMC_OCR_S18A);
+   }
+   host_ocr |= ocr;
 
/* Send the new OCR value until all cards are ready. */
-   if (sdmmc_mem_send_op_cond(sc, host_ocr, NULL) != 0) {
+   if (sdmmc_mem_send_op_cond(sc, host_ocr, _ocr) != 0) {
DPRINTF(("%s: can't send memory OCR\n", DEVNAME(sc)));
return 1;
}
+
+   if (ISSET(sc->sc_flags, SMF_SD_MODE) && ISSET(new_ocr, MMC_OCR_S18A)) {
+   /*
+* Card and host support low voltage mode, begin switch
+* sequence.
+*/
+   struct sdmmc_command cmd;
+
+   memset(, 0, sizeof(cmd));
+   cmd.c_arg = 0;
+   cmd.c_flags = SCF_CMD_AC | SCF_RSP_R1;
+   cmd.c_opcode = SD_VOLTAGE_SWITCH;
+   DPRINTF(("%s: switching card to 1.8V\n", DEVNAME(sc)));
+   error = sdmmc_mmc_command(sc, );
+   if (error) {
+   DPRINTF(("%s: voltage switch command failed\n",
+   SDMMCDEVNAME(sc)));
+   return error;
+   }
+
+   error = sdmmc_mem_signal_voltage(sc, SDMMC_SIGNAL_VOLTAGE_180);
+   if (error)
+   return error;
+
+   SET(sc->sc_flags, SMF_UHS_MODE);
+   }
+
+   return 0;
+}
+
+int
+sdmmc_mem_signal_voltage(struct sdmmc_softc *sc, int signal_voltage)
+{
+   int error;
+
+   /*
+* Stop the clock
+*/
+   error = sdmmc_chip_bus_clock(sc->sct, sc->sch, 0, SDMMC_TIMING_LEGACY);
+   if (error)
+   return error;
+
+   delay(1000);
+
+   /*
+* Card switch command was successful, update host controller
+* signal voltage setting.
+

cosmetic sdmmc(4) diff

2020-08-15 Thread Mark Kettenis
This diff makes sdmmc(4) print ddr52 and hs200 capabilities, making it
possible to see whether a controller supports these high-speed eMMC
modes.

ok?


Index: dev/sdmmc/sdmmc.c
===
RCS file: /cvs/src/sys/dev/sdmmc/sdmmc.c,v
retrieving revision 1.56
diff -u -p -r1.56 sdmmc.c
--- dev/sdmmc/sdmmc.c   24 Jul 2020 12:43:32 -  1.56
+++ dev/sdmmc/sdmmc.c   15 Aug 2020 12:53:32 -
@@ -113,6 +113,10 @@ sdmmc_attach(struct device *parent, stru
printf(", sd high-speed");
if (ISSET(saa->caps, SMC_CAPS_MMC_HIGHSPEED))
printf(", mmc high-speed");
+   if (ISSET(saa->caps, SMC_CAPS_MMC_DDR52))
+   printf(", ddr52");
+   if (ISSET(saa->caps, SMC_CAPS_MMC_HS200))
+   printf(", hs200");
if (ISSET(saa->caps, SMC_CAPS_DMA))
printf(", dma");
printf("\n");



Re: Enable arm64 PAN feature

2020-08-15 Thread Mark Kettenis
> Date: Sat, 15 Aug 2020 20:21:09 +1000
> From: Jonathan Gray 
> 
> On Fri, Aug 14, 2020 at 11:06:59PM +0200, Mark Kettenis wrote:
> > > Date: Fri, 14 Aug 2020 14:40:23 +0200 (CEST)
> > > From: Mark Kettenis 
> > > 
> > > I suppose a way to test this properly is to pick a system call and
> > > replace a copyin() with a direct access?  That will succeed without
> > > PAN but should fail with PAN enabled right?
> > 
> > So that does indeed work.  However, the result is a hard hang.  So
> > here as an additional diff that makes sure we panic instead.  The idea
> > is that all user-space access from the kernel should be done by the
> > special unprivileged load/store instructions.
> 
> Would disabling PSTATE.PAN in copyin/copyout along the lines of how
> stac/clac is done for SMAP avoid having to test the instruction type
> entirely?

No.  The problem is that we meed to catch permission faults caused by
PAN.  But since the faulting address may be valid in the sense that
UVM has a mapping for them that allows the requested access.  In that
case we end up looping since uvm_fault() returns 0 and we retry the
faulting instruction.

> Is it faulting on valid copyin/copyout the way you have it at the
> moment?  I don't quite follow what is going on.

The copyin/copyout functions use the unpriviliged load/store
instructions (LDTR/STTR) which bypass PAN.  But we may still fault
because the memory hasn't been faulted in or because the memory has
been marked read-only for CoW or for MOD/REF emulation.  And some of
those faults manifest themselves as permission faults as well.

Currently (without the diff quoted below) those faults will be handled
just fine.  The diff below needs to make sure this continues to be the
case.  The easiest way to do that is to check the instruction.

Note that this check is in the "slow path".  In most cases the address
touched by copyin/copyout will already be in the page tables since
userland touched it already.

Does that clarfify things?


> > Index: arch/arm64/arm64/trap.c
> > ===
> > RCS file: /cvs/src/sys/arch/arm64/arm64/trap.c,v
> > retrieving revision 1.27
> > diff -u -p -r1.27 trap.c
> > --- arch/arm64/arm64/trap.c 6 Jan 2020 12:37:30 -   1.27
> > +++ arch/arm64/arm64/trap.c 14 Aug 2020 21:05:54 -
> > @@ -65,6 +65,14 @@ void do_el0_error(struct trapframe *);
> >  
> >  void dumpregs(struct trapframe*);
> >  
> > +/* Check whether we're executing an unprivileged load/store instruction. */
> > +static inline int
> > +is_unpriv_ldst(uint64_t elr)
> > +{
> > +   uint32_t insn = *(uint32_t *)elr;
> > +   return ((insn & 0x3f200c00) == 0x38000800);
> 
> The value of op1 (bit 26) is not used according to the table in the Arm
> ARM.  The mask would be better as 0x3b200c00
> 
> 
> > +}
> > +
> >  static void
> >  data_abort(struct trapframe *frame, uint64_t esr, uint64_t far,
> >  int lower, int exe)
> > @@ -104,8 +112,18 @@ data_abort(struct trapframe *frame, uint
> > /* The top bit tells us which range to use */
> > if ((far >> 63) == 1)
> > map = kernel_map;
> > -   else
> > +   else {
> > +   /*
> > +* Only allow user-space access using
> > +* unprivileged load/store instructions.
> > +*/
> > +   if (!is_unpriv_ldst(frame->tf_elr)) {
> > +   panic("attempt to access user address"
> > + " 0x%llx from EL1", far);
> > +   }
> > +
> > map = >p_vmspace->vm_map;
> > +   }
> > }
> >  
> > if (exe)
> > 
> > 
> 



Re: Enable arm64 PAN feature

2020-08-14 Thread Mark Kettenis
> Date: Fri, 14 Aug 2020 14:40:23 +0200 (CEST)
> From: Mark Kettenis 
> 
> I suppose a way to test this properly is to pick a system call and
> replace a copyin() with a direct access?  That will succeed without
> PAN but should fail with PAN enabled right?

So that does indeed work.  However, the result is a hard hang.  So
here as an additional diff that makes sure we panic instead.  The idea
is that all user-space access from the kernel should be done by the
special unprivileged load/store instructions.

Index: arch/arm64/arm64/trap.c
===
RCS file: /cvs/src/sys/arch/arm64/arm64/trap.c,v
retrieving revision 1.27
diff -u -p -r1.27 trap.c
--- arch/arm64/arm64/trap.c 6 Jan 2020 12:37:30 -   1.27
+++ arch/arm64/arm64/trap.c 14 Aug 2020 21:05:54 -
@@ -65,6 +65,14 @@ void do_el0_error(struct trapframe *);
 
 void dumpregs(struct trapframe*);
 
+/* Check whether we're executing an unprivileged load/store instruction. */
+static inline int
+is_unpriv_ldst(uint64_t elr)
+{
+   uint32_t insn = *(uint32_t *)elr;
+   return ((insn & 0x3f200c00) == 0x38000800);
+}
+
 static void
 data_abort(struct trapframe *frame, uint64_t esr, uint64_t far,
 int lower, int exe)
@@ -104,8 +112,18 @@ data_abort(struct trapframe *frame, uint
/* The top bit tells us which range to use */
if ((far >> 63) == 1)
map = kernel_map;
-   else
+   else {
+   /*
+* Only allow user-space access using
+* unprivileged load/store instructions.
+*/
+   if (!is_unpriv_ldst(frame->tf_elr)) {
+   panic("attempt to access user address"
+ " 0x%llx from EL1", far);
+   }
+
map = >p_vmspace->vm_map;
+   }
}
 
if (exe)



Re: Enable arm64 PAN feature

2020-08-14 Thread Mark Kettenis
> Date: Fri, 14 Aug 2020 12:29:51 +1000
> From: Jonathan Gray 
> 
> On Thu, Aug 13, 2020 at 09:17:41PM +0200, Mark Kettenis wrote:
> > ARMv8.1 introduced PAN (Priviliged Access Never) which prevents the
> > kernel from accessing userland data.  This can be bypassed by using
> > special instructions which we already use in copyin(9) and friends.
> > So we can simply turn this feature on if the CPU supports it.
> > 
> > Tested on an Odroid-C4 which has Cortex-A55 cores that have PAN
> > support.
> > 
> > ok?
> 
> This should be changing PSTATE.PAN as well.  Can you force an
> acess of this type to be sure the permission fault occurs?
> 
> >From the Arm ARM:
> 
> "When the value of PSTATE.PAN is 1, any privileged data access from
> EL1, or EL2 when HCR_EL2.E2H is 1, to a virtual memory address that
> is accessible at EL0, generates a Permission fault.
> 
> When the value of PSTATE.PAN is 0, the translation system is the
> same as in Armv8.0.
> 
> When ARMv8.1-PAN is implemented, the SPSR_EL1.PAN, SPSR_EL2.PAN, and
> SPSR_EL3.PAN bits are used for exception returns, and the DSPSR_EL0
> register is used for entry to or exit from Debug state.
> 
> When ARMv8.1-PAN is implemented, the SCTLR_EL1.SPAN and SCTLR_EL2.SPAN
> bits are used to control whether the PAN bit is set on an exception
> to EL1 or EL2."

By clearing SCTRL_EL0.SPAN, PSTATE.PAN will be set on an exception to
EL1.  Yes, this does mean that PSTATE.PAN won't be set until a CPU
returns to userland for the first time.  But from then on PSTATE.PAN
will be set.

I suppose a way to test this properly is to pick a system call and
replace a copyin() with a direct access?  That will succeed without
PAN but should fail with PAN enabled right?

> > Index: arch/arm64/arm64/cpu.c
> > ===
> > RCS file: /cvs/src/sys/arch/arm64/arm64/cpu.c,v
> > retrieving revision 1.38
> > diff -u -p -r1.38 cpu.c
> > --- arch/arm64/arm64/cpu.c  4 Jun 2020 21:18:16 -   1.38
> > +++ arch/arm64/arm64/cpu.c  13 Aug 2020 19:12:30 -
> > @@ -321,6 +321,7 @@ cpu_attach(struct device *parent, struct
> > struct fdt_attach_args *faa = aux;
> > struct cpu_info *ci;
> > uint64_t mpidr = READ_SPECIALREG(mpidr_el1);
> > +   uint64_t id_aa64mmfr1, sctlr;
> > uint32_t opp;
> >  
> > KASSERT(faa->fa_nreg > 0);
> > @@ -393,6 +394,14 @@ cpu_attach(struct device *parent, struct
> > cpu_cpuspeed = cpu_clockspeed;
> > }
> >  
> > +   /* Enable PAN. */
> > +   id_aa64mmfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
> > +   if (ID_AA64MMFR1_PAN(id_aa64mmfr1) != ID_AA64MMFR1_PAN_NONE) {
> > +   sctlr = READ_SPECIALREG(sctlr_el1);
> > +   sctlr &= ~SCTLR_SPAN;
> > +   WRITE_SPECIALREG(sctlr_el1, sctlr);
> > +   }
> > +
> > /* Initialize debug registers. */
> > WRITE_SPECIALREG(mdscr_el1, DBG_MDSCR_TDCC);
> > WRITE_SPECIALREG(oslar_el1, 0);
> > @@ -522,6 +531,7 @@ cpu_boot_secondary(struct cpu_info *ci)
> >  void
> >  cpu_start_secondary(struct cpu_info *ci)
> >  {
> > +   uint64_t id_aa64mmfr1, sctlr;
> > uint64_t tcr;
> > int s;
> >  
> > @@ -543,6 +553,14 @@ cpu_start_secondary(struct cpu_info *ci)
> > tcr |= TCR_T0SZ(64 - USER_SPACE_BITS);
> > tcr |= TCR_A1;
> > WRITE_SPECIALREG(tcr_el1, tcr);
> > +
> > +   /* Enable PAN. */
> > +   id_aa64mmfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
> > +   if (ID_AA64MMFR1_PAN(id_aa64mmfr1) != ID_AA64MMFR1_PAN_NONE) {
> > +   sctlr = READ_SPECIALREG(sctlr_el1);
> > +   sctlr &= ~SCTLR_SPAN;
> > +   WRITE_SPECIALREG(sctlr_el1, sctlr);
> > +   }
> >  
> > /* Initialize debug registers. */
> > WRITE_SPECIALREG(mdscr_el1, DBG_MDSCR_TDCC);
> > Index: arch/arm64/include/armreg.h
> > ===
> > RCS file: /cvs/src/sys/arch/arm64/include/armreg.h,v
> > retrieving revision 1.11
> > diff -u -p -r1.11 armreg.h
> > --- arch/arm64/include/armreg.h 5 Jun 2020 22:14:25 -   1.11
> > +++ arch/arm64/include/armreg.h 13 Aug 2020 19:12:30 -
> > @@ -451,6 +451,7 @@
> >  #defineSCTLR_nTWI  0x0001
> >  #defineSCTLR_nTWE  0x0004
> >  #defineSCTLR_WXN   0x0008
> > +#defineSCTLR_SPAN  0x0080
> >  #defineSCTLR_EOE   0x0100
> >  #defineSCTLR_EE0x0200
> >  #defineSCTLR_UCI   0x0400
> > @@ -478,6 +479,7 @@
> >  #definePSR_D   0x0200
> >  #definePSR_IL  0x0010
> >  #definePSR_SS  0x0020
> > +#definePSR_PAN 0x0040
> >  #definePSR_V   0x1000
> >  #definePSR_C   0x2000
> >  #definePSR_Z   0x4000
> > 
> > 
> 



softintr.h comment tweak

2020-08-14 Thread Mark Kettenis
Miod noticed that the powerpc64 version talked about AArch64.  I don't
think the "for all XXX platforms" makes sense so simply drop it from
all three versions of this header.

ok?


Index: arch/arm/include/softintr.h
===
RCS file: /cvs/src/sys/arch/arm/include/softintr.h,v
retrieving revision 1.5
diff -u -p -r1.5 softintr.h
--- arch/arm/include/softintr.h 21 Dec 2010 14:56:23 -  1.5
+++ arch/arm/include/softintr.h 14 Aug 2020 12:30:10 -
@@ -44,7 +44,7 @@
 #include 
 
 /*
- * Generic software interrupt support for all ARM platforms.
+ * Generic software interrupt support.
  *
  * To use this code, include  from your platform's
  * .
Index: arch/arm64/include/softintr.h
===
RCS file: /cvs/src/sys/arch/arm64/include/softintr.h,v
retrieving revision 1.1
diff -u -p -r1.1 softintr.h
--- arch/arm64/include/softintr.h   17 Dec 2016 23:38:33 -  1.1
+++ arch/arm64/include/softintr.h   14 Aug 2020 12:30:10 -
@@ -39,7 +39,7 @@
 #include 
 
 /*
- * Generic software interrupt support for all AArch64 platforms.
+ * Generic software interrupt support.
  *
  * To use this code, include  from your platform's
  * .
Index: arch/powerpc64/include/softintr.h
===
RCS file: /cvs/src/sys/arch/powerpc64/include/softintr.h,v
retrieving revision 1.1
diff -u -p -r1.1 softintr.h
--- arch/powerpc64/include/softintr.h   16 May 2020 17:11:14 -  1.1
+++ arch/powerpc64/include/softintr.h   14 Aug 2020 12:30:10 -
@@ -38,7 +38,7 @@
 #include 
 
 /*
- * Generic software interrupt support for all AArch64 platforms.
+ * Generic software interrupt support.
  *
  * To use this code, include  from your platform's
  * .



Re: Enable arm64 PAN feature

2020-08-13 Thread Mark Kettenis
> Date: Thu, 13 Aug 2020 22:52:57 +0200
> From: Patrick Wildt 
> 
> On Thu, Aug 13, 2020 at 09:17:41PM +0200, Mark Kettenis wrote:
> > ARMv8.1 introduced PAN (Priviliged Access Never) which prevents the
> > kernel from accessing userland data.  This can be bypassed by using
> > special instructions which we already use in copyin(9) and friends.
> > So we can simply turn this feature on if the CPU supports it.
> > 
> > Tested on an Odroid-C4 which has Cortex-A55 cores that have PAN
> > support.
> > 
> > ok?
> > 
> 
> So if I read this right, the SPAN bit makes that an exception to
> kernel/hypervisor mode sets the PAN bit in the PSTATE.  Exception
> also means "interrupt", or is this only syscall?  I think intrs
> are also exceptions...
> 
> Essentially, everytime we switch to EL1/EL2 PAN will be enabled?

Yes!

> Sounds good to me, ok patrick@

Thanks.  I'll wait a bit to give drahn@ and jsg@ the opportunity to
chime in.

> > Index: arch/arm64/arm64/cpu.c
> > ===
> > RCS file: /cvs/src/sys/arch/arm64/arm64/cpu.c,v
> > retrieving revision 1.38
> > diff -u -p -r1.38 cpu.c
> > --- arch/arm64/arm64/cpu.c  4 Jun 2020 21:18:16 -   1.38
> > +++ arch/arm64/arm64/cpu.c  13 Aug 2020 19:12:30 -
> > @@ -321,6 +321,7 @@ cpu_attach(struct device *parent, struct
> > struct fdt_attach_args *faa = aux;
> > struct cpu_info *ci;
> > uint64_t mpidr = READ_SPECIALREG(mpidr_el1);
> > +   uint64_t id_aa64mmfr1, sctlr;
> > uint32_t opp;
> >  
> > KASSERT(faa->fa_nreg > 0);
> > @@ -393,6 +394,14 @@ cpu_attach(struct device *parent, struct
> > cpu_cpuspeed = cpu_clockspeed;
> > }
> >  
> > +   /* Enable PAN. */
> > +   id_aa64mmfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
> > +   if (ID_AA64MMFR1_PAN(id_aa64mmfr1) != ID_AA64MMFR1_PAN_NONE) {
> > +   sctlr = READ_SPECIALREG(sctlr_el1);
> > +   sctlr &= ~SCTLR_SPAN;
> > +   WRITE_SPECIALREG(sctlr_el1, sctlr);
> > +   }
> > +
> > /* Initialize debug registers. */
> > WRITE_SPECIALREG(mdscr_el1, DBG_MDSCR_TDCC);
> > WRITE_SPECIALREG(oslar_el1, 0);
> > @@ -522,6 +531,7 @@ cpu_boot_secondary(struct cpu_info *ci)
> >  void
> >  cpu_start_secondary(struct cpu_info *ci)
> >  {
> > +   uint64_t id_aa64mmfr1, sctlr;
> > uint64_t tcr;
> > int s;
> >  
> > @@ -543,6 +553,14 @@ cpu_start_secondary(struct cpu_info *ci)
> > tcr |= TCR_T0SZ(64 - USER_SPACE_BITS);
> > tcr |= TCR_A1;
> > WRITE_SPECIALREG(tcr_el1, tcr);
> > +
> > +   /* Enable PAN. */
> > +   id_aa64mmfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
> > +   if (ID_AA64MMFR1_PAN(id_aa64mmfr1) != ID_AA64MMFR1_PAN_NONE) {
> > +   sctlr = READ_SPECIALREG(sctlr_el1);
> > +   sctlr &= ~SCTLR_SPAN;
> > +   WRITE_SPECIALREG(sctlr_el1, sctlr);
> > +   }
> >  
> > /* Initialize debug registers. */
> > WRITE_SPECIALREG(mdscr_el1, DBG_MDSCR_TDCC);
> > Index: arch/arm64/include/armreg.h
> > ===
> > RCS file: /cvs/src/sys/arch/arm64/include/armreg.h,v
> > retrieving revision 1.11
> > diff -u -p -r1.11 armreg.h
> > --- arch/arm64/include/armreg.h 5 Jun 2020 22:14:25 -   1.11
> > +++ arch/arm64/include/armreg.h 13 Aug 2020 19:12:30 -
> > @@ -451,6 +451,7 @@
> >  #defineSCTLR_nTWI  0x0001
> >  #defineSCTLR_nTWE  0x0004
> >  #defineSCTLR_WXN   0x0008
> > +#defineSCTLR_SPAN  0x0080
> >  #defineSCTLR_EOE   0x0100
> >  #defineSCTLR_EE0x0200
> >  #defineSCTLR_UCI   0x0400
> > @@ -478,6 +479,7 @@
> >  #definePSR_D   0x0200
> >  #definePSR_IL  0x0010
> >  #definePSR_SS  0x0020
> > +#definePSR_PAN 0x0040
> >  #definePSR_V   0x1000
> >  #definePSR_C   0x2000
> >  #definePSR_Z   0x4000
> > 
> 



Enable arm64 PAN feature

2020-08-13 Thread Mark Kettenis
ARMv8.1 introduced PAN (Priviliged Access Never) which prevents the
kernel from accessing userland data.  This can be bypassed by using
special instructions which we already use in copyin(9) and friends.
So we can simply turn this feature on if the CPU supports it.

Tested on an Odroid-C4 which has Cortex-A55 cores that have PAN
support.

ok?


Index: arch/arm64/arm64/cpu.c
===
RCS file: /cvs/src/sys/arch/arm64/arm64/cpu.c,v
retrieving revision 1.38
diff -u -p -r1.38 cpu.c
--- arch/arm64/arm64/cpu.c  4 Jun 2020 21:18:16 -   1.38
+++ arch/arm64/arm64/cpu.c  13 Aug 2020 19:12:30 -
@@ -321,6 +321,7 @@ cpu_attach(struct device *parent, struct
struct fdt_attach_args *faa = aux;
struct cpu_info *ci;
uint64_t mpidr = READ_SPECIALREG(mpidr_el1);
+   uint64_t id_aa64mmfr1, sctlr;
uint32_t opp;
 
KASSERT(faa->fa_nreg > 0);
@@ -393,6 +394,14 @@ cpu_attach(struct device *parent, struct
cpu_cpuspeed = cpu_clockspeed;
}
 
+   /* Enable PAN. */
+   id_aa64mmfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
+   if (ID_AA64MMFR1_PAN(id_aa64mmfr1) != ID_AA64MMFR1_PAN_NONE) {
+   sctlr = READ_SPECIALREG(sctlr_el1);
+   sctlr &= ~SCTLR_SPAN;
+   WRITE_SPECIALREG(sctlr_el1, sctlr);
+   }
+
/* Initialize debug registers. */
WRITE_SPECIALREG(mdscr_el1, DBG_MDSCR_TDCC);
WRITE_SPECIALREG(oslar_el1, 0);
@@ -522,6 +531,7 @@ cpu_boot_secondary(struct cpu_info *ci)
 void
 cpu_start_secondary(struct cpu_info *ci)
 {
+   uint64_t id_aa64mmfr1, sctlr;
uint64_t tcr;
int s;
 
@@ -543,6 +553,14 @@ cpu_start_secondary(struct cpu_info *ci)
tcr |= TCR_T0SZ(64 - USER_SPACE_BITS);
tcr |= TCR_A1;
WRITE_SPECIALREG(tcr_el1, tcr);
+
+   /* Enable PAN. */
+   id_aa64mmfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
+   if (ID_AA64MMFR1_PAN(id_aa64mmfr1) != ID_AA64MMFR1_PAN_NONE) {
+   sctlr = READ_SPECIALREG(sctlr_el1);
+   sctlr &= ~SCTLR_SPAN;
+   WRITE_SPECIALREG(sctlr_el1, sctlr);
+   }
 
/* Initialize debug registers. */
WRITE_SPECIALREG(mdscr_el1, DBG_MDSCR_TDCC);
Index: arch/arm64/include/armreg.h
===
RCS file: /cvs/src/sys/arch/arm64/include/armreg.h,v
retrieving revision 1.11
diff -u -p -r1.11 armreg.h
--- arch/arm64/include/armreg.h 5 Jun 2020 22:14:25 -   1.11
+++ arch/arm64/include/armreg.h 13 Aug 2020 19:12:30 -
@@ -451,6 +451,7 @@
 #defineSCTLR_nTWI  0x0001
 #defineSCTLR_nTWE  0x0004
 #defineSCTLR_WXN   0x0008
+#defineSCTLR_SPAN  0x0080
 #defineSCTLR_EOE   0x0100
 #defineSCTLR_EE0x0200
 #defineSCTLR_UCI   0x0400
@@ -478,6 +479,7 @@
 #definePSR_D   0x0200
 #definePSR_IL  0x0010
 #definePSR_SS  0x0020
+#definePSR_PAN 0x0040
 #definePSR_V   0x1000
 #definePSR_C   0x2000
 #definePSR_Z   0x4000



sdmmc(4) HS200 support

2020-08-12 Thread Mark Kettenis
This diff lays the groundwork for HS200 mode for eMMC.  This mode
supports data transfer modes of up to 200 MB/s.  The crucial bit here
is that using this mode requires tuning which is done by calling a
chip-specific execute_tuning function.  I have an implementation for
amlmmc(4) that seems to work which I'll send in a separate diff.

The diff also adds some bits to support higher speed modes for SD
cards, but those aren't hooked up yet.  I'll leave that for a future
diff.  I'll probably look into adding support for sdhc(4) in the near
future as well.

Don't expect enormous speed increases from this; most eMMC chips I've
seen are not actually limited by the tramsfer speed.  But for
amlmmc(4) this makes a difference since the DDR52 mode isn't reliable.

Tested on the Odroid-N2, Odroid-C4 and an amd64 netbook that uses eMMC.

ok?


Index: dev/sdmmc/sdhc.c
===
RCS file: /cvs/src/sys/dev/sdmmc/sdhc.c,v
retrieving revision 1.68
diff -u -p -r1.68 sdhc.c
--- dev/sdmmc/sdhc.c14 Jun 2020 18:37:16 -  1.68
+++ dev/sdmmc/sdhc.c12 Aug 2020 18:52:05 -
@@ -121,26 +121,18 @@ void  sdhc_dump_regs(struct sdhc_host *);
 #endif
 
 struct sdmmc_chip_functions sdhc_functions = {
-   /* host controller reset */
-   sdhc_host_reset,
-   /* host controller capabilities */
-   sdhc_host_ocr,
-   sdhc_host_maxblklen,
-   /* card detection */
-   sdhc_card_detect,
-   /* bus power and clock frequency */
-   sdhc_bus_power,
-   sdhc_bus_clock,
-   sdhc_bus_width,
-   /* command execution */
-   sdhc_exec_command,
-   /* card interrupt */
-   sdhc_card_intr_mask,
-   sdhc_card_intr_ack,
-   /* UHS functions */
-   sdhc_signal_voltage,
-   /* hibernate */
-   sdhc_hibernate_init,
+   .host_reset = sdhc_host_reset,
+   .host_ocr = sdhc_host_ocr,
+   .host_maxblklen = sdhc_host_maxblklen,
+   .card_detect = sdhc_card_detect,
+   .bus_power = sdhc_bus_power,
+   .bus_clock = sdhc_bus_clock,
+   .bus_width = sdhc_bus_width,
+   .exec_command = sdhc_exec_command,
+   .card_intr_mask = sdhc_card_intr_mask,
+   .card_intr_ack = sdhc_card_intr_ack,
+   .signal_voltage = sdhc_signal_voltage,
+   .hibernate_init = sdhc_hibernate_init,
 };
 
 struct cfdriver sdhc_cd = {
Index: dev/sdmmc/sdmmc_mem.c
===
RCS file: /cvs/src/sys/dev/sdmmc/sdmmc_mem.c,v
retrieving revision 1.33
diff -u -p -r1.33 sdmmc_mem.c
--- dev/sdmmc/sdmmc_mem.c   4 Jun 2018 13:33:10 -   1.33
+++ dev/sdmmc/sdmmc_mem.c   12 Aug 2020 18:52:05 -
@@ -70,6 +70,34 @@ int  sdmmc_mem_write_block_subr(struct sd
 #define DPRINTF(s) /**/
 #endif
 
+const struct {
+   const char *name;
+   int v;
+   int freq;
+} switch_group0_functions[] = {
+   /* Default/SDR12 */
+   { "Default/SDR12",   0,  25000 },
+
+   /* High-Speed/SDR25 */
+   { "High-Speed/SDR25",   SMC_CAPS_SD_HIGHSPEED,   5 },
+
+   /* SDR50 */
+   { "SDR50",  SMC_CAPS_UHS_SDR50, 10 },
+
+   /* SDR104 */
+   { "SDR104", SMC_CAPS_UHS_SDR104,208000 },
+
+   /* DDR50 */
+   { "DDR50",  SMC_CAPS_UHS_DDR50,  5 },
+};
+
+const int sdmmc_mmc_timings[] = {
+   [SDMMC_TIMING_LEGACY]   = 26000,
+   [SDMMC_TIMING_HIGHSPEED]= 52000,
+   [SDMMC_TIMING_MMC_DDR52]= 52000,
+   [SDMMC_TIMING_MMC_HS200]= 20
+};
+
 /*
  * Initialize SD/MMC memory cards and memory in SDIO "combo" cards.
  */
@@ -581,6 +609,41 @@ sdmmc_be512_to_bitfield512(sdmmc_bitfiel
 }
 
 int
+sdmmc_mem_execute_tuning(struct sdmmc_softc *sc, struct sdmmc_function *sf)
+{
+   int timing = -1;
+
+   if (ISSET(sc->sc_flags, SMF_SD_MODE)) {
+   if (!ISSET(sc->sc_flags, SMF_UHS_MODE))
+   return 0;
+
+   switch (sf->csd.tran_speed) {
+   case 10:
+   timing = SDMMC_TIMING_UHS_SDR50;
+   break;
+   case 208000:
+   timing = SDMMC_TIMING_UHS_SDR104;
+   break;
+   default:
+   return 0;
+   }
+   } else {
+   switch (sf->csd.tran_speed) {
+   case 20:
+   timing = SDMMC_TIMING_MMC_HS200;
+   break;
+   default:
+   return 0;
+   }
+   }
+
+   DPRINTF(("%s: execute tuning for timing %d\n", SDMMCDEVNAME(sc),
+   timing));
+
+   return sdmmc_chip_execute_tuning(sc->sct, sc->sch, timing);
+}
+
+int
 sdmmc_mem_sd_init(struct sdmmc_softc *sc, struct sdmmc_function *sf)
 {
int support_func, best_func, error;
@@ -647,6 +710,8 @@ sdmmc_mem_sd_init(struct sdmmc_softc *sc
  

Re: exar XR17V35x (again)

2020-08-11 Thread Mark Kettenis
> Date: Wed, 5 Aug 2020 16:39:06 -0500
> From: joshua stein 
> Content-Type: text/plain; charset=utf-8
> Content-Disposition: inline
> 
> In 2018 I added support for Exar XR17V35x serial ports:
> 
> The Exar XR17V354 has 4 com ports that have a 256-byte FIFO, use a
> frequency of 125Mhz, and have a unique sleep register.  A custom
> interrupt handler is setup in puc for these ports so it can check a
> register which reports which ports triggered the interrupt, rather
> than having to run comintr for every port every time.
> 
> https://github.com/openbsd/src/commit/21514470a28cb3682074159e69358d924e73f5f1
> 
> This was backed out shortly after:
> 
> Revert previous commit; the XR17V35X probe that was added accesses 
> registers
> that aren't guaranteed to be there and may even belong to a different 
> device.
> This triggers a fault on hppa machines like the C3000 for example.
> 
> https://github.com/openbsd/src/commit/9f2c39383ee470c5b76fb828f64ea58341b214e9
> https://github.com/openbsd/src/commit/b40f90ead5d17e757784d1aa91d0d0406ce61d20
> 
> I needed this again, so this version sets the sc_uarttype in 
> com_puc_attach ahead of time because the type of port is already 
> known (from pucdata.c).  This avoids any probe in com_attach_subr 
> which previously tried to upgrade from COM_UART_16550A to 
> COM_UART_XR17V35X.
> 
> puc0 at pci4 dev 0 function 0 "Exar XR17V354" rev 0x03: ports: 16 com
> com4 at puc0 port 0 apic 5 int 16: xr17v35x, 256 byte fifo
> com5 at puc0 port 1 apic 5 int 16: xr17v35x, 256 byte fifo
> com6 at puc0 port 2 apic 5 int 16: xr17v35x, 256 byte fifo
> com7 at puc0 port 3 apic 5 int 16: xr17v35x, 256 byte fifo

Yes, this is a better approach.  Two (smallish) issues below though.

> Index: sys/dev/ic/com.c
> ===
> RCS file: /cvs/src/sys/dev/ic/com.c,v
> retrieving revision 1.172
> diff -u -p -u -p -r1.172 com.c
> --- sys/dev/ic/com.c  9 Mar 2020 04:38:46 -   1.172
> +++ sys/dev/ic/com.c  5 Aug 2020 21:35:02 -
> @@ -306,6 +306,9 @@ comopen(dev_t dev, int flag, int mode, s
>   case COM_UART_TI16750:
>   com_write_reg(sc, com_ier, 0);
>   break;
> + case COM_UART_XR17V35X:
> + com_write_reg(sc, UART_EXAR_SLEEP, 0);
> + break;
>   }
>   }
>  
> @@ -498,6 +501,9 @@ compwroff(struct com_softc *sc)
>   case COM_UART_TI16750:
>   com_write_reg(sc, com_ier, IER_SLEEP);
>   break;
> + case COM_UART_XR17V35X:
> + com_write_reg(sc, UART_EXAR_SLEEP, 0xff);
> + break;
>   }
>   }
>  }
> @@ -533,6 +539,9 @@ com_resume(struct com_softc *sc)
>   case COM_UART_TI16750:
>   com_write_reg(sc, com_ier, 0);
>   break;
> + case COM_UART_XR17V35X:
> + com_write_reg(sc, UART_EXAR_SLEEP, 0);
> + break;
>   }
>   }
>  
> @@ -919,7 +928,7 @@ comstart(struct tty *tp)
>   }
>  
>   if (ISSET(sc->sc_hwflags, COM_HW_FIFO)) {
> - u_char buffer[128]; /* largest fifo */
> + u_char buffer[256]; /* largest fifo */
>   int i, n;
>  
>   n = q_to_b(>t_outq, buffer,
> @@ -1466,6 +1475,11 @@ com_attach_subr(struct com_softc *sc)
>   break;
>  #endif
>  #endif
> + case COM_UART_XR17V35X:
> + printf(": xr17v35x, 256 byte fifo\n");
> + SET(sc->sc_hwflags, COM_HW_FIFO);
> + sc->sc_fifolen = 256;
> + break;
>   default:
>   panic("comattach: bad fifo type");
>   }
> @@ -1473,7 +1487,8 @@ com_attach_subr(struct com_softc *sc)
>  #ifdef COM_CONSOLE
>   if (!ISSET(sc->sc_hwflags, COM_HW_CONSOLE))
>  #endif
> - com_fifo_probe(sc);
> + if (sc->sc_fifolen < 256)
> + com_fifo_probe(sc);
>  
>   if (sc->sc_fifolen == 0) {
>   CLR(sc->sc_hwflags, COM_HW_FIFO);
> Index: sys/dev/ic/comreg.h
> ===
> RCS file: /cvs/src/sys/dev/ic/comreg.h,v
> retrieving revision 1.19
> diff -u -p -u -p -r1.19 comreg.h
> --- sys/dev/ic/comreg.h   2 May 2018 13:20:12 -   1.19
> +++ sys/dev/ic/comreg.h   5 Aug 2020 21:35:02 -
> @@ -182,6 +182,11 @@
>  
>  #define  COM_NPORTS  8
>  
> +/* Exar XR17V35X */
> +#define UART_EXAR_INT0   0x80
> +#define UART_EXAR_SLEEP  0x8b/* Sleep mode */
> +#define UART_EXAR_DVID   0x8d/* Device identification */
> +
>  /*
>   * WARNING: Serial console is assumed to be at COM1 address
>   */
> Index: sys/dev/ic/comvar.h
> ===
> 

Re: process: annotate locking for setitimer(2) state

2020-08-09 Thread Mark Kettenis
> Date: Sun, 9 Aug 2020 10:02:38 -0500
> From: Scott Cheloha 
> 
> On Sun, Aug 09, 2020 at 04:43:24PM +0200, Mark Kettenis wrote:
> > > Date: Sat, 8 Aug 2020 19:46:14 -0500
> > > From: Scott Cheloha 
> > > 
> > > Hi,
> > > 
> > > I want to annotate the locking for the per-process interval timers.
> > > 
> > > In the process struct, the ITIMER_REAL itimerspec and the ps_itimer_to
> > > timeout are protected by the kernel lock.  These should be annotated
> > > with "K", right?
> > > 
> > > Also in the process struct, the ITIMER_VIRTUAL and ITIMER_PROF
> > > itimerspecs are protected by the global itimer_mtx.
> > > 
> > > However, I don't think "itimer_mtx" isn't the best name for it, as it
> > > doesn't protect state for *all* per-process interval timers.  Just the
> > > virtual ones.
> > > 
> > > Could I rename the mutex to "virtual_itimer_mtx"?  Then I can annotate
> > > the state protected by it with "V", as shown here in this patch.
> > 
> > That's quite a long variable name though.  And it also protects
> > ITIMER_PROF.  So I'd say the name would be at least as misleading as
> > the current one and perhaps even more so.  You can just use "I" as the
> > annotation perhaps?
> 
> The convention is to use "I" for immutable variables.  We do it
> everywhere.  I don't think we should buck convention here.
> 
> I also proposed using "i" in a prior patch to annotate these
> variables, but mpi@ said it was too close to "I".  Also, it's a global
> lock, and we have settled on only annotate global locks with capital
> letters.
> 
> If you don't want to rename the mutex I guess we could use "T" for
> "timer".  We use "T" for other global locks (tc_lock, timeout_mutex)
> but not in this context.
> 
> However, there are only so many letters.  Eventually this scheme will
> run afoul of that limitation.  An idea I had re. the letter shortage
> was to use two letters where necessary.  So instead of "I" you could
> use "It" for "itimer".  We annotate locking hierarchies with commas so
> there isn't an ambiguity when reading it.
> 
> For example, if the code for writing a hypothetical "ps_foo" process
> struct member was:
> 
>   KERNEL_LOCK();
>   mtx_enter(_mtx);
>   ps.ps_foo = 10;
>   mtx_leave(_mtx);
>   KERNEL_UNLOCK();
> 
> You could annotate it like this:
> 
> /*
>  * Locks used to protect process struct members:
>  *
>  *It  itimer_mtx
>  *K   kernel lock
>  */
> struct process {
>   /* [...] */
>   int ps_foo; /* [K,It] per-process foobar */
>   /* [...] */
> };
> 
> anton@, mpi@: is that too radical or easily misread?
> 
> Sorry if this all seems fussy, but I'd like to get this right the
> first time.

'T' is fine with me.  But I'm clearly not an authority here.  Anyway,
renaming variables because you don't have a matching letter to
annotate the lock doesn't feel right.



Re: hardclock(9): fix race with setitimer(2)

2020-08-09 Thread Mark Kettenis
> Date: Sun, 9 Aug 2020 06:54:53 -0500
> From: Scott Cheloha 
> 
> Hi,
> 
> We update the ITIMER_VIRTUAL and ITIMER_PROF per-process interval
> timers from hardclock(9).  If a timer is enabled we call itimerdecr()
> to update and reload it as needed.  If a timer has expired we then set
> a flag on the current thread to signal itself when returning to
> userspace.
> 
> However, there is a race here with setitimer(2).  In hardclock(9) we
> check whether a timer is enabled *before* entering itimer_mtx in
> itimerdecr(), but once we have entered the mutex we don't double-check
> that the timer is still enabled.  This is wrong.  Another thread may
> have disabled the timer via setitimer(2) while we were entering the
> mutex.
> 
> This patch adds the second check to itimerdecr().  If we lost the race
> and the timer is disabled we return 1 to indicate that the timer has
> not expired, i.e. that the thread should take no action.
> 
> ok?

ok kettenis@

> Index: kern_time.c
> ===
> RCS file: /cvs/src/sys/kern/kern_time.c,v
> retrieving revision 1.134
> diff -u -p -r1.134 kern_time.c
> --- kern_time.c   8 Aug 2020 01:01:26 -   1.134
> +++ kern_time.c   9 Aug 2020 11:47:02 -
> @@ -682,6 +682,20 @@ itimerdecr(struct itimerspec *itp, long 
>   NSEC_TO_TIMESPEC(nsec, );
>  
>   mtx_enter(_mtx);
> +
> + /*
> +  * Double-check that the timer is enabled.  We may have lost
> +  * a race with another thread in setitimer(2) when entering
> +  * itimer_mtx.
> +  */
> + if (!timespecisset(>it_value)) {
> + mtx_leave(_mtx);
> + return (1);
> + }
> +
> + /*
> +  * The timer is enabled.  Update and reload it as needed.
> +  */
>   timespecsub(>it_value, , >it_value);
>   if (itp->it_value.tv_sec >= 0 && timespecisset(>it_value)) {
>   mtx_leave(_mtx);
> 
> 



Re: process: annotate locking for setitimer(2) state

2020-08-09 Thread Mark Kettenis
> Date: Sat, 8 Aug 2020 19:46:14 -0500
> From: Scott Cheloha 
> 
> Hi,
> 
> I want to annotate the locking for the per-process interval timers.
> 
> In the process struct, the ITIMER_REAL itimerspec and the ps_itimer_to
> timeout are protected by the kernel lock.  These should be annotated
> with "K", right?
> 
> Also in the process struct, the ITIMER_VIRTUAL and ITIMER_PROF
> itimerspecs are protected by the global itimer_mtx.
> 
> However, I don't think "itimer_mtx" isn't the best name for it, as it
> doesn't protect state for *all* per-process interval timers.  Just the
> virtual ones.
> 
> Could I rename the mutex to "virtual_itimer_mtx"?  Then I can annotate
> the state protected by it with "V", as shown here in this patch.

That's quite a long variable name though.  And it also protects
ITIMER_PROF.  So I'd say the name would be at least as misleading as
the current one and perhaps even more so.  You can just use "I" as the
annotation perhaps?

> Preferences?  ok?
> 
> Index: kern/kern_time.c
> ===
> RCS file: /cvs/src/sys/kern/kern_time.c,v
> retrieving revision 1.134
> diff -u -p -r1.134 kern_time.c
> --- kern/kern_time.c  8 Aug 2020 01:01:26 -   1.134
> +++ kern/kern_time.c  9 Aug 2020 00:41:10 -
> @@ -488,7 +488,13 @@ out:
>  }
>  
>  
> -struct mutex itimer_mtx = MUTEX_INITIALIZER(IPL_CLOCK);
> +/*
> + * Global virtual interval timer mutex.
> + *
> + * Protects state for the per-process ITIMER_VIRTUAL and ITIMER_PROF
> + * interval timers.
> + */
> +struct mutex virtual_itimer_mtx = MUTEX_INITIALIZER(IPL_CLOCK);
>  
>  /*
>   * Get value of an interval timer.  The process virtual and
> @@ -526,10 +532,10 @@ sys_getitimer(struct proc *p, void *v, r
>   return (EINVAL);
>   itimer = >p_p->ps_timer[which];
>   memset(, 0, sizeof(aitv));
> - mtx_enter(_mtx);
> + mtx_enter(_itimer_mtx);
>   TIMESPEC_TO_TIMEVAL(_interval, >it_interval);
>   TIMESPEC_TO_TIMEVAL(_value, >it_value);
> - mtx_leave(_mtx);
> + mtx_leave(_itimer_mtx);
>  
>   if (which == ITIMER_REAL) {
>   struct timeval now;
> @@ -604,9 +610,9 @@ sys_setitimer(struct proc *p, void *v, r
>   }
>   pr->ps_timer[ITIMER_REAL] = aits;
>   } else {
> - mtx_enter(_mtx);
> + mtx_enter(_itimer_mtx);
>   pr->ps_timer[which] = aits;
> - mtx_leave(_mtx);
> + mtx_leave(_itimer_mtx);
>   }
>  
>   return (0);
> @@ -681,20 +687,20 @@ itimerdecr(struct itimerspec *itp, long 
>  
>   NSEC_TO_TIMESPEC(nsec, );
>  
> - mtx_enter(_mtx);
> + mtx_enter(_itimer_mtx);
>   timespecsub(>it_value, , >it_value);
>   if (itp->it_value.tv_sec >= 0 && timespecisset(>it_value)) {
> - mtx_leave(_mtx);
> + mtx_leave(_itimer_mtx);
>   return (1);
>   }
>   if (!timespecisset(>it_interval)) {
>   timespecclear(>it_value);
> - mtx_leave(_mtx);
> + mtx_leave(_itimer_mtx);
>   return (0);
>   }
>   while (itp->it_value.tv_sec < 0 || !timespecisset(>it_value))
>   timespecadd(>it_value, >it_interval, >it_value);
> - mtx_leave(_mtx);
> + mtx_leave(_itimer_mtx);
>   return (0);
>  }
>  
> Index: sys/proc.h
> ===
> RCS file: /cvs/src/sys/sys/proc.h,v
> retrieving revision 1.297
> diff -u -p -r1.297 proc.h
> --- sys/proc.h6 Jul 2020 13:33:09 -   1.297
> +++ sys/proc.h9 Aug 2020 00:41:11 -
> @@ -150,9 +150,11 @@ struct unveil;
>  /*
>   * Locks used to protect struct members in this file:
>   *   a   atomic operations
> + *   K   kernel lock
>   *   m   this process' `ps_mtx'
>   *   p   this process' `ps_lock'
>   *   R   rlimit_lock
> + *   V   virtual_itimer_mtx
>   */
>  struct process {
>   /*
> @@ -216,7 +218,8 @@ struct process {
>   struct  rusage *ps_ru;  /* sum of stats for dead threads. */
>   struct  tusage ps_tu;   /* accumulated times. */
>   struct  rusage ps_cru;  /* sum of stats for reaped children */
> - struct  itimerspec ps_timer[3]; /* timers, indexed by ITIMER_* */
> + struct  itimerspec ps_timer[3]; /* [K] ITIMER_REAL timer */
> + /* [V] ITIMER_{PROF,VIRTUAL} timers */
>   struct  timeout ps_rucheck_to;  /* [] resource limit check timer */
>   time_t  ps_nextxcpu;/* when to send next SIGXCPU, */
>   /* in seconds of process runtime */
> @@ -269,7 +272,7 @@ struct process {
>   int ps_refcnt;  /* Number of references. */
>  
>   struct  timespec ps_start;  /* starting uptime. */
> - struct  timeout ps_realit_to;   /* real-time itimer trampoline. */
> + struct  timeout ps_realit_to;   /* [K] ITIMER_REAL 

Re: acpicpu: remove acpicpu_sc array

2020-08-05 Thread Mark Kettenis
> Date: Wed, 5 Aug 2020 17:10:37 +1000
> From: Jonathan Matthew 
> 
> This came out of the work on supporting ACPI0007 devices in acpicpu(4), but
> it's independent of that and I'd like to get it in the tree separately.
> 
> Since it was first added, acpicpu stores instances of itself in an array, 
> which it uses to find the acpicpu device for a cpu.  This runs into problems
> when there are more than MAXCPUS acpicpu devices.  Currently it overwrites
> whatever's after the array, leading to varying crashes and hangs depending
> on kernel link order.
> 
> More recently, we've added a pointer to struct cpu_info that does this more
> directly, and also has the advantage that it actually matches up the cpu ids
> rather than assuming cpu3 maps to acpicpu3.
> 
> This diff removes the acpicpu_sc array and uses the pointer from struct
> cpu_info instead.  Most of the accesses are just looking for the first 
> acpicpu,
> so we can use cpu_info_primary to find that.
> 
> I've tested this on a few different machines (including one with 128 acpicpu
> devices) and everything still works.
> 
> ok?

ok kettenis@

> Index: acpicpu.c
> ===
> RCS file: /cvs/src/sys/dev/acpi/acpicpu.c,v
> retrieving revision 1.85
> diff -u -p -r1.85 acpicpu.c
> --- acpicpu.c 27 May 2020 05:02:21 -  1.85
> +++ acpicpu.c 3 Aug 2020 05:10:45 -
> @@ -188,8 +188,6 @@ struct cfdriver acpicpu_cd = {
>  
>  extern int setperf_prio;
>  
> -struct acpicpu_softc *acpicpu_sc[MAXCPUS];
> -
>  #if 0
>  void
>  acpicpu_set_throttle(struct acpicpu_softc *sc, int level)
> @@ -672,7 +670,6 @@ acpicpu_attach(struct device *parent, st
>  
>   sc->sc_acpi = (struct acpi_softc *)parent;
>   sc->sc_devnode = aa->aaa_node;
> - acpicpu_sc[sc->sc_dev.dv_unit] = sc;
>  
>   SLIST_INIT(>sc_cstates);
>  
> @@ -979,7 +976,7 @@ acpicpu_fetch_pss(struct acpicpu_pss **p
>* the bios ensures this...
>*/
>  
> - sc = acpicpu_sc[0];
> + sc = (struct acpicpu_softc *)cpu_info_primary.ci_acpicpudev;
>   if (!sc)
>   return 0;
>   *pss = sc->sc_pss;
> @@ -1024,7 +1021,7 @@ acpicpu_set_notify(void (*func)(struct a
>  {
>   struct acpicpu_softc*sc;
>  
> - sc = acpicpu_sc[0];
> + sc = (struct acpicpu_softc *)cpu_info_primary.ci_acpicpudev;
>   if (sc != NULL)
>   sc->sc_notify = func;
>  }
> @@ -1034,7 +1031,7 @@ acpicpu_setperf_ppc_change(struct acpicp
>  {
>   struct acpicpu_softc*sc;
>  
> - sc = acpicpu_sc[0];
> + sc = (struct acpicpu_softc *)cpu_info_primary.ci_acpicpudev;
>  
>   if (sc != NULL)
>   cpu_setperf(sc->sc_level);
> @@ -1048,7 +1045,7 @@ acpicpu_setperf(int level)
>   int idx, len;
>   uint32_tstatus = 0;
>  
> - sc = acpicpu_sc[cpu_number()];
> + sc = (struct acpicpu_softc *)curcpu()->ci_acpicpudev;
>  
>   dnprintf(10, "%s: acpicpu setperf level %d\n",
>   sc->sc_devnode->name, level);
> 
> 



Re: acpicpu(4) and ACPI0007

2020-08-01 Thread Mark Kettenis
> Date: Sat, 1 Aug 2020 18:23:08 +1000
> From: Jonathan Matthew 
> Cc: tech@openbsd.org
> Content-Type: text/plain; charset=us-ascii
> Content-Disposition: inline
> 
> On Wed, Jul 29, 2020 at 08:29:31PM +1000, Jonathan Matthew wrote:
> > On Wed, Jul 29, 2020 at 10:06:14AM +0200, Mark Kettenis wrote:
> > > > Date: Wed, 29 Jul 2020 10:38:55 +1000
> > > > From: Jonathan Matthew 
> > > > 
> > > > On Tue, Jul 28, 2020 at 07:30:36PM +0200, Mark Kettenis wrote:
> > > > > > Date: Tue, 28 Jul 2020 21:42:46 +1000
> > > > > > From: Jonathan Matthew 
> > > > > > 
> > > > > > On Tue, Jul 28, 2020 at 11:12:21AM +0200, Mark Kettenis wrote:
> > > > > > > > Date: Tue, 28 Jul 2020 13:46:34 +1000
> > > > > > > > From: Jonathan Matthew 
> > > > > > > > 
> > > > > > > > On Mon, Jul 27, 2020 at 05:16:47PM +0200, Mark Kettenis wrote:
> > > > > > > > > > Date: Mon, 27 Jul 2020 17:02:41 +0200 (CEST)
> > > > > > > > > > From: Mark Kettenis 
> > > > > > > > > > 
> > > > > > > > > > Recent ACPI versions have deprecated "Processor()" nodes in 
> > > > > > > > > > favout of
> > > > > > > > > > "Device()" nodes with a _HID() method that returns 
> > > > > > > > > > "ACPI0007".  This
> > > > > > > > > > diff tries to support machines with firmware that 
> > > > > > > > > > implements this.  If
> > > > > > > > > > you see something like:
> > > > > > > > > > 
> > > > > > > > > >   "ACPI0007" at acpi0 not configured
> > > > > > > > > > 
> > > > > > > > > > please try the following diff and report back with an 
> > > > > > > > > > updated dmesg.
> > > > > > > > > > 
> > > > > > > > > > Cheers,
> > > > > > > > > > 
> > > > > > > > > > Mark
> > > > > > > > > 
> > > > > > > > > And now with the right diff...
> > > > > > > > 
> > > > > > > > On a dell r6415, it looks like this:
> > > > > > > > 
> > > > > > > > acpicpu0 at acpi0copyvalue: 6: C1(@1 halt!)
> > > > > > > > all the way up to
> > > > > > > > acpicpu127 at acpi0copyvalue: 6: no cpu matching ACPI ID 127
> > > > > > > > 
> > > > > > > > which I guess means aml_copyvalue() needs to learn how to copy 
> > > > > > > > AML_OBJTYPE_DEVICE.
> > > > > > > 
> > > > > > > Yes.  It is not immediately obvious how this should work.  Do we 
> > > > > > > need
> > > > > > > to copy the aml_node pointer or not?  We don't do that for
> > > > > > > AML_OBJTYPE_PROCESSOR and AML_OBJTYPE_POWERRSRC types which are
> > > > > > > similar to AML_OBJTYPE_DEVICE.  But AML_OBJTYPE_DEVICE object 
> > > > > > > don't
> > > > > > > carry any additional information.  So we end up with just an empty
> > > > > > > case to avoid the warning.
> > > > > > > 
> > > > > > > Does this work on the Dell machines?
> > > > > > 
> > > > > > We've seen crashes in pool_cache_get() in various places after all 
> > > > > > the acpicpus
> > > > > > attach, which we haven't seen before on these machines, so I think 
> > > > > > it's
> > > > > > corrupting memory somehow.
> > > > > 
> > > > > Does that happen with only the acpicpu(4) diff?
> > > > 
> > > > Yes.  Looking at this a bit more, in the case where aml_evalnode() can't
> > > > copy the result value, it leaves it uninitialised, which means we'll 
> > > > call
> > > > aml_freevalue() where res is stack junk.  memset(, 0, 
> > > > sizeof(res))
> > > > seems to fix it.
> > > 
> > > Eh, where exactly?
> > 
> > I had it just before the call to aml_evalnode(), but that can't be it,
> > since aml_evalno

Re: no output on glass console after switching to serial

2020-08-01 Thread Mark Kettenis
> Date: Sat, 1 Aug 2020 20:54:28 +0100
> From: Stuart Henderson 
> 
> I've just been building a machine with serial console to go to colo
> tomorrow and have noticed that there's no output on glass console
> after the "switching console to com0" message. The only getty running
> after boot is the one on serial console.
> 
> I won't be able to do much in the way of testing on it now but thought
> it would be worth flagging anyway. Anyone see similar on other machines?

See below...

> >> OpenBSD/amd64 BOOT 3.52
> boot>
> booting hd0a:/bsd: 14488904+3191824+344096+0+872448 
> [972760+128+1137936+860957]=0x14ddc88
> entry point at 0x81001000
> [ using 2972808 bytes of bsd ELF symbol table ]
> Copyright (c) 1982, 1986, 1989, 1991, 1993
> The Regents of the University of California.  All rights reserved.
> Copyright (c) 1995-2020 OpenBSD. All rights reserved.  https://www.OpenBSD.org
> 
> OpenBSD 6.7-current (GENERIC.MP) #380: Fri Jul 31 09:04:24 MDT 2020
> dera...@amd64.openbsd.org:/usr/src/sys/arch/amd64/compile/GENERIC.MP
> real mem = 8464842752 (8072MB)
> avail mem = 8193216512 (7813MB)
> random: good seed from bootblocks
> mpath0 at root
> scsibus0 at mpath0: 256 targets
> mainbus0 at root
> bios0 at mainbus0: SMBIOS rev. 2.8 @ 0xed9b0 (46 entries)
> bios0: vendor American Megatrends Inc. version "1.3" date 03/19/2018
> bios0: Supermicro Super Server
> acpi0 at bios0: ACPI 5.0
> acpi0: sleep states S0 S4 S5
> acpi0: tables DSDT FACP APIC FPDT FIDT SPMI MCFG UEFI DBG2 HPET WDDT SSDT 
> SSDT SSDT PRAD DMAR HEST BERT ERST EINJ
> acpi0: wakeup devices IP2P(S4) EHC1(S4) EHC2(S4) RP07(S4) RP08(S4) BR1A(S4) 
> BR1B(S4) BR2A(S4) BR2B(S4) BR2C(S4) BR2D(S4) BR3A(S4) BR3B(S4) BR3C(S4) 
> BR3D(S4) RP01(S4) [...]
> acpitimer0 at acpi0: 3579545 Hz, 24 bits
> acpimadt0 at acpi0 addr 0xfee0: PC-AT compat
> cpu0 at mainbus0: apid 0 (boot processor)
> cpu0: Intel(R) Xeon(R) CPU D-1518 @ 2.20GHz, 2200.27 MHz, 06-56-03
> cpu0: 
> FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,SDBG,FMA3,CX16,xTPR,PDCM,PCID,DCA,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,BMI1,HLE,AVX2,SMEP,BMI2,ERMS,INVPCID,RTM,PQM,RDSEED,ADX,SMAP,PT,MD_CLEAR,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,XSAVEOPT,MELTDOWN
> cpu0: 256KB 64b/line 8-way L2 cache
> cpu0: smt 0, core 0, package 0
> mtrr: Pentium Pro MTRR support, 10 var ranges, 88 fixed ranges
> cpu0: apic clock running at 99MHz
> cpu0: mwait min=64, max=64, C-substates=0.2.1.2, IBE
> cpu1 at mainbus0: apid 2 (application processor)
> cpu1: Intel(R) Xeon(R) CPU D-1518 @ 2.20GHz, 2200.01 MHz, 06-56-03
> cpu1: 
> FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,SDBG,FMA3,CX16,xTPR,PDCM,PCID,DCA,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,BMI1,HLE,AVX2,SMEP,BMI2,ERMS,INVPCID,RTM,PQM,RDSEED,ADX,SMAP,PT,MD_CLEAR,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,XSAVEOPT,MELTDOWN
> cpu1: 256KB 64b/line 8-way L2 cache
> cpu1: smt 0, core 1, package 0
> cpu2 at mainbus0: apid 4 (application processor)
> cpu2: Intel(R) Xeon(R) CPU D-1518 @ 2.20GHz, 2200.01 MHz, 06-56-03
> cpu2: 
> FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,SDBG,FMA3,CX16,xTPR,PDCM,PCID,DCA,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,BMI1,HLE,AVX2,SMEP,BMI2,ERMS,INVPCID,RTM,PQM,RDSEED,ADX,SMAP,PT,MD_CLEAR,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,XSAVEOPT,MELTDOWN
> cpu2: 256KB 64b/line 8-way L2 cache
> cpu2: smt 0, core 2, package 0
> cpu3 at mainbus0: apid 6 (application processor)
> cpu3: Intel(R) Xeon(R) CPU D-1518 @ 2.20GHz, 2200.01 MHz, 06-56-03
> cpu3: 
> FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,SDBG,FMA3,CX16,xTPR,PDCM,PCID,DCA,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,BMI1,HLE,AVX2,SMEP,BMI2,ERMS,INVPCID,RTM,PQM,RDSEED,ADX,SMAP,PT,MD_CLEAR,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,XSAVEOPT,MELTDOWN
> cpu3: 256KB 64b/line 8-way L2 cache
> cpu3: smt 0, core 3, package 0
> cpu4 at mainbus0: apid 1 (application processor)
> cpu4: Intel(R) Xeon(R) CPU D-1518 @ 2.20GHz, 2200.00 MHz, 06-56-03
> cpu4: 
> 

Re: kernel crash in setrunqueue

2020-07-29 Thread Mark Kettenis
> Date: Wed, 29 Jul 2020 13:03:43 -0700
> From: Mike Larkin 
> 
> Hi,
> 
>  I'm seeing crashes on amd64 GENERIC.MP on a few VMs recently. This happens
> on GENERIC.MP regardless of whether or not the VM has one cpu or more than
> one. It does not happen on GENERIC kernels.
> 
>  The crash will happen fairly quickly after the kernel starts executing
> processes. Sometimes it crashes instantly, sometimes it lasts for a minute
> or two. It rarely makes it to the login prompt. The problem is 100%
> reproducible on two different VMs I have, running on two different
> hypervisors (Hyper-V and ESXi6.7U2).
> 
>  I first started noticing the problem on the 24th July snap, but TBH these
> machines were not frequently updated, so the previous snap I had installed
> might have been a couple months old. Whatever older snap was on them before
> worked fine.
> 
>  Since this is happening on two different machines with two different VMs,
> I'm gonna rule out hardware issues.
> 
>  Crash:
> 
> kernel: pretection fault trap, code=0
> Stopped atsetrunqueue+0xa2:   addl$0x1,0x288(%r13)
> 
>  Trace:
> ddb{2}> trace
> setrunqueue(27b3d6c24c3fab80, 800015e874e0,32) at setrunqueue+0xa2
> sched_barrier_task(800015f1a168) at sched_barrier_task+0x6c
> taskq_thread(82121548) at taskq_thread+0x8d
> end trace frame: 0x0, count: -3
> 
>  Registers:
> ddb{2}> sh r
> rdi   0x821ee728  sched_lock
> rsi   0x800014cc6ff0
> rbp   0x800015ea0e40
> rbx0
> rdx 0x23ca94  acpi_pdirpa_0x2288fc
> rcx  0xc
> rax  0xc
> r8 0x202
> r9   0x2
> r100
> r11   0x57f79bf6968709d8
> r12   0x800015e874e0
> r13   0x27b3d6c24c3fab80
> r14 0x32
> r15   0x27b3d6c24c3fab80
> rip   0x81b9df22  setrunqueue+0xa2
> cs   0x8
> rflags   0x10207  __ALIGN_SIZE+0xf207
> rsp   0x800015ea0df0
> ss  0x10
> 
> 
> The offending instruction is in kern_sched.c:260:
> 
>   spc->spc_nrun++;
> 
> ... which indicates 'spc' is trash (and it is, based on %r13 above). In my
> tests, %r13 always is this same trash value. That comes from 'ci', which is
> either passed in or chosen by sched_choosecpu. Neither of these functions
> have changed recently, so I'm guessing this corruption is coming from 
> something
> else.
> 
>  Anyone have ideas where to start looking? I suppose I could start bisecting,
> but does anyone know of any changes that would affect this area?
> 
>  I can send dmesgs if needed, but these are pretty standard VMs,
> nothing fancy configured in them. 4 CPUs, 8GB RAM, etc.

They're VMs and it turns out that many of the "PV" drivers are/were
using the intr_barrier() interface the wrong way.

For Hyper-V, see my reply in the "Panic on boot with Hyper-V since Jun
17 snapshot" thread on bugs@ from earlier today.

Cheers,

Mark



Re: acpicpu(4) and ACPI0007

2020-07-29 Thread Mark Kettenis
> Date: Wed, 29 Jul 2020 10:38:55 +1000
> From: Jonathan Matthew 
> 
> On Tue, Jul 28, 2020 at 07:30:36PM +0200, Mark Kettenis wrote:
> > > Date: Tue, 28 Jul 2020 21:42:46 +1000
> > > From: Jonathan Matthew 
> > > 
> > > On Tue, Jul 28, 2020 at 11:12:21AM +0200, Mark Kettenis wrote:
> > > > > Date: Tue, 28 Jul 2020 13:46:34 +1000
> > > > > From: Jonathan Matthew 
> > > > > 
> > > > > On Mon, Jul 27, 2020 at 05:16:47PM +0200, Mark Kettenis wrote:
> > > > > > > Date: Mon, 27 Jul 2020 17:02:41 +0200 (CEST)
> > > > > > > From: Mark Kettenis 
> > > > > > > 
> > > > > > > Recent ACPI versions have deprecated "Processor()" nodes in 
> > > > > > > favout of
> > > > > > > "Device()" nodes with a _HID() method that returns "ACPI0007".  
> > > > > > > This
> > > > > > > diff tries to support machines with firmware that implements 
> > > > > > > this.  If
> > > > > > > you see something like:
> > > > > > > 
> > > > > > >   "ACPI0007" at acpi0 not configured
> > > > > > > 
> > > > > > > please try the following diff and report back with an updated 
> > > > > > > dmesg.
> > > > > > > 
> > > > > > > Cheers,
> > > > > > > 
> > > > > > > Mark
> > > > > > 
> > > > > > And now with the right diff...
> > > > > 
> > > > > On a dell r6415, it looks like this:
> > > > > 
> > > > > acpicpu0 at acpi0copyvalue: 6: C1(@1 halt!)
> > > > > all the way up to
> > > > > acpicpu127 at acpi0copyvalue: 6: no cpu matching ACPI ID 127
> > > > > 
> > > > > which I guess means aml_copyvalue() needs to learn how to copy 
> > > > > AML_OBJTYPE_DEVICE.
> > > > 
> > > > Yes.  It is not immediately obvious how this should work.  Do we need
> > > > to copy the aml_node pointer or not?  We don't do that for
> > > > AML_OBJTYPE_PROCESSOR and AML_OBJTYPE_POWERRSRC types which are
> > > > similar to AML_OBJTYPE_DEVICE.  But AML_OBJTYPE_DEVICE object don't
> > > > carry any additional information.  So we end up with just an empty
> > > > case to avoid the warning.
> > > > 
> > > > Does this work on the Dell machines?
> > > 
> > > We've seen crashes in pool_cache_get() in various places after all the 
> > > acpicpus
> > > attach, which we haven't seen before on these machines, so I think it's
> > > corrupting memory somehow.
> > 
> > Does that happen with only the acpicpu(4) diff?
> 
> Yes.  Looking at this a bit more, in the case where aml_evalnode() can't
> copy the result value, it leaves it uninitialised, which means we'll call
> aml_freevalue() where res is stack junk.  memset(, 0, sizeof(res))
> seems to fix it.

Eh, where exactly?

> > > With this addition, we get this for each cpu:
> > > acpicpu0 at acpi0: C1(@1 halt!)
> > 
> > The exclamation mark indicates that this is the "fallback" C-state.
> > Is there a _CST method at all?
> > 
> > Anyway, given that this is a server system, it isn't really surprising
> > that there isn't any fancy power saving stuff.
> 
> Right, there doesn't seem to be any.  The processor devices look like this
> in the aml:
> 
> Scope (_SB)
> {
> Device (C000)
> {
> Name (_HID, "ACPI0007" /* Processor Device */)  // _HID: Hardware 
> ID
> Name (_UID, 0x00)  // _UID: Unique ID
> }
> 
> Device (C001)
> {
> Name (_HID, "ACPI0007" /* Processor Device */)  // _HID: Hardware 
> ID
> Name (_UID, 0x01)  // _UID: Unique ID
> }
> 
>  .. and so on.

Usually there is an SSDT that fills in the details.  The acpidump
output I have for the r6415 does have one. but it doesn't add
anything.

> > > > Index: dev/acpi/dsdt.c
> > > > ===
> > > > RCS file: /cvs/src/sys/dev/acpi/dsdt.c,v
> > > > retrieving revision 1.252
> > > > diff -u -p -r1.252 dsdt.c
> > > > --- dev/acpi/dsdt.c 21 Jul 2020 03:48:06 -  1.252
> > > > +++ dev/acpi/dsdt.c 28 Jul 2020 09:04:15 -
> > > > @@ -996,6 +996,8 @@ aml_copyvalue(struct aml_value *lhs, str
> > > > lhs->v_objref = rhs->v_objref;
> > > > aml_addref(lhs->v_objref.ref, "");
> > > > break;
> > > > +   case AML_OBJTYPE_DEVICE:
> > > > +   break;
> > > > default:
> > > > printf("copyvalue: %x", rhs->type);
> > > > break;
> > > 
> > > 
> 



Re: acpicpu(4) and ACPI0007

2020-07-28 Thread Mark Kettenis
> Date: Tue, 28 Jul 2020 21:42:46 +1000
> From: Jonathan Matthew 
> 
> On Tue, Jul 28, 2020 at 11:12:21AM +0200, Mark Kettenis wrote:
> > > Date: Tue, 28 Jul 2020 13:46:34 +1000
> > > From: Jonathan Matthew 
> > > 
> > > On Mon, Jul 27, 2020 at 05:16:47PM +0200, Mark Kettenis wrote:
> > > > > Date: Mon, 27 Jul 2020 17:02:41 +0200 (CEST)
> > > > > From: Mark Kettenis 
> > > > > 
> > > > > Recent ACPI versions have deprecated "Processor()" nodes in favout of
> > > > > "Device()" nodes with a _HID() method that returns "ACPI0007".  This
> > > > > diff tries to support machines with firmware that implements this.  If
> > > > > you see something like:
> > > > > 
> > > > >   "ACPI0007" at acpi0 not configured
> > > > > 
> > > > > please try the following diff and report back with an updated dmesg.
> > > > > 
> > > > > Cheers,
> > > > > 
> > > > > Mark
> > > > 
> > > > And now with the right diff...
> > > 
> > > On a dell r6415, it looks like this:
> > > 
> > > acpicpu0 at acpi0copyvalue: 6: C1(@1 halt!)
> > > all the way up to
> > > acpicpu127 at acpi0copyvalue: 6: no cpu matching ACPI ID 127
> > > 
> > > which I guess means aml_copyvalue() needs to learn how to copy 
> > > AML_OBJTYPE_DEVICE.
> > 
> > Yes.  It is not immediately obvious how this should work.  Do we need
> > to copy the aml_node pointer or not?  We don't do that for
> > AML_OBJTYPE_PROCESSOR and AML_OBJTYPE_POWERRSRC types which are
> > similar to AML_OBJTYPE_DEVICE.  But AML_OBJTYPE_DEVICE object don't
> > carry any additional information.  So we end up with just an empty
> > case to avoid the warning.
> > 
> > Does this work on the Dell machines?
> 
> We've seen crashes in pool_cache_get() in various places after all the 
> acpicpus
> attach, which we haven't seen before on these machines, so I think it's
> corrupting memory somehow.

Does that happen with only the acpicpu(4) diff?

> With this addition, we get this for each cpu:
> acpicpu0 at acpi0: C1(@1 halt!)

The exclamation mark indicates that this is the "fallback" C-state.
Is there a _CST method at all?

Anyway, given that this is a server system, it isn't really surprising
that there isn't any fancy power saving stuff.

> > Index: dev/acpi/dsdt.c
> > ===
> > RCS file: /cvs/src/sys/dev/acpi/dsdt.c,v
> > retrieving revision 1.252
> > diff -u -p -r1.252 dsdt.c
> > --- dev/acpi/dsdt.c 21 Jul 2020 03:48:06 -  1.252
> > +++ dev/acpi/dsdt.c 28 Jul 2020 09:04:15 -
> > @@ -996,6 +996,8 @@ aml_copyvalue(struct aml_value *lhs, str
> > lhs->v_objref = rhs->v_objref;
> > aml_addref(lhs->v_objref.ref, "");
> > break;
> > +   case AML_OBJTYPE_DEVICE:
> > +   break;
> > default:
> > printf("copyvalue: %x", rhs->type);
> > break;
> 
> 



Re: acpicpu(4) and ACPI0007

2020-07-28 Thread Mark Kettenis
> Date: Tue, 28 Jul 2020 11:16:56 +0100
> From: Jason McIntyre 
> 
> On Tue, Jul 28, 2020 at 11:12:21AM +0200, Mark Kettenis wrote:
> > > Date: Tue, 28 Jul 2020 13:46:34 +1000
> > > From: Jonathan Matthew 
> > > 
> > > On Mon, Jul 27, 2020 at 05:16:47PM +0200, Mark Kettenis wrote:
> > > > > Date: Mon, 27 Jul 2020 17:02:41 +0200 (CEST)
> > > > > From: Mark Kettenis 
> > > > > 
> > > > > Recent ACPI versions have deprecated "Processor()" nodes in favout of
> > > > > "Device()" nodes with a _HID() method that returns "ACPI0007".  This
> > > > > diff tries to support machines with firmware that implements this.  If
> > > > > you see something like:
> > > > > 
> > > > >   "ACPI0007" at acpi0 not configured
> > > > > 
> > > > > please try the following diff and report back with an updated dmesg.
> > > > > 
> > > > > Cheers,
> > > > > 
> > > > > Mark
> > > > 
> > > > And now with the right diff...
> > > 
> > > On a dell r6415, it looks like this:
> > > 
> > > acpicpu0 at acpi0copyvalue: 6: C1(@1 halt!)
> > > all the way up to
> > > acpicpu127 at acpi0copyvalue: 6: no cpu matching ACPI ID 127
> > > 
> > > which I guess means aml_copyvalue() needs to learn how to copy 
> > > AML_OBJTYPE_DEVICE.
> > 
> > Yes.  It is not immediately obvious how this should work.  Do we need
> > to copy the aml_node pointer or not?  We don't do that for
> > AML_OBJTYPE_PROCESSOR and AML_OBJTYPE_POWERRSRC types which are
> > similar to AML_OBJTYPE_DEVICE.  But AML_OBJTYPE_DEVICE object don't
> > carry any additional information.  So we end up with just an empty
> > case to avoid the warning.
> > 
> > Does this work on the Dell machines?
> > 
> > 
> > Index: dev/acpi/dsdt.c
> > ===
> > RCS file: /cvs/src/sys/dev/acpi/dsdt.c,v
> > retrieving revision 1.252
> > diff -u -p -r1.252 dsdt.c
> > --- dev/acpi/dsdt.c 21 Jul 2020 03:48:06 -  1.252
> > +++ dev/acpi/dsdt.c 28 Jul 2020 09:04:15 -
> > @@ -996,6 +996,8 @@ aml_copyvalue(struct aml_value *lhs, str
> > lhs->v_objref = rhs->v_objref;
> > aml_addref(lhs->v_objref.ref, "");
> > break;
> > +   case AML_OBJTYPE_DEVICE:
> > +   break;
> > default:
> > printf("copyvalue: %x", rhs->type);
> > break;
> > 
> 
> morning. it displays this here:
> 
>   acpicpu0 at acpi0: C3(0@350 io@0x415), C2(0@400 io@0x414), C1(0@1 
> mwait), PSS
>   acpicpu1 at acpi0: C3(0@350 io@0x415), C2(0@400 io@0x414), C1(0@1 
> mwait), PSS
>   acpicpu2 at acpi0: C3(0@350 io@0x415), C2(0@400 io@0x414), C1(0@1 
> mwait), PSS
>   acpicpu3 at acpi0: C3(0@350 io@0x415), C2(0@400 io@0x414), C1(0@1 
> mwait), PSS
>   acpicpu4 at acpi0: C3(0@350 io@0x415), C2(0@400 io@0x414), C1(0@1 
> mwait), PSS
>   acpicpu5 at acpi0: C3(0@350 io@0x415), C2(0@400 io@0x414), C1(0@1 
> mwait), PSS
>   acpicpu6 at acpi0: C3(0@350 io@0x415), C2(0@400 io@0x414), C1(0@1 
> mwait), PSS
>   acpicpu7 at acpi0: C3(0@350 io@0x415), C2(0@400 io@0x414), C1(0@1 
> mwait), PSS
>   acpicpu8 at acpi0: no cpu matching ACPI ID 8
>   acpicpu9 at acpi0: no cpu matching ACPI ID 9
>   acpicpu10 at acpi0: no cpu matching ACPI ID 10
>   acpicpu11 at acpi0: no cpu matching ACPI ID 11
>   acpicpu12 at acpi0: no cpu matching ACPI ID 12
>   acpicpu13 at acpi0: no cpu matching ACPI ID 13
>   acpicpu14 at acpi0: no cpu matching ACPI ID 14
>   acpicpu15 at acpi0: no cpu matching ACPI ID 15

Excellent!

We may want to do something about those "no cpu matching ACPU ID XX"
messages at some point.  But that's a diff for another day.

So ok's for both diffs are welcome.

Cheers,

Mark

P.S. I've also established that that the EC-related messages are
indeed harmless and a result of sloppy BIOS writers.

> OpenBSD 6.7-current (GENERIC.MP) #3: Tue Jul 28 10:59:50 BST 2020
> jmc@kansas:/usr/src/sys/arch/amd64/compile/GENERIC.MP
> real mem = 7895654400 (7529MB)
> avail mem = 7641284608 (7287MB)
> random: good seed from bootblocks
> mpath0 at root
> scsibus0 at mpath0: 256 targets
> mainbus0 at root
> bios0 at mainbus0: SMBIOS rev. 3.2 @ 0xca707000 (77 entries)
> bios0: vendor Dell Inc. version "1.1.0" date 05/27/2020
> bios0: Dell Inc. 

Re: acpicpu(4) and ACPI0007

2020-07-28 Thread Mark Kettenis
> Date: Tue, 28 Jul 2020 13:46:34 +1000
> From: Jonathan Matthew 
> 
> On Mon, Jul 27, 2020 at 05:16:47PM +0200, Mark Kettenis wrote:
> > > Date: Mon, 27 Jul 2020 17:02:41 +0200 (CEST)
> > > From: Mark Kettenis 
> > > 
> > > Recent ACPI versions have deprecated "Processor()" nodes in favout of
> > > "Device()" nodes with a _HID() method that returns "ACPI0007".  This
> > > diff tries to support machines with firmware that implements this.  If
> > > you see something like:
> > > 
> > >   "ACPI0007" at acpi0 not configured
> > > 
> > > please try the following diff and report back with an updated dmesg.
> > > 
> > > Cheers,
> > > 
> > > Mark
> > 
> > And now with the right diff...
> 
> On a dell r6415, it looks like this:
> 
> acpicpu0 at acpi0copyvalue: 6: C1(@1 halt!)
> all the way up to
> acpicpu127 at acpi0copyvalue: 6: no cpu matching ACPI ID 127
> 
> which I guess means aml_copyvalue() needs to learn how to copy 
> AML_OBJTYPE_DEVICE.

Yes.  It is not immediately obvious how this should work.  Do we need
to copy the aml_node pointer or not?  We don't do that for
AML_OBJTYPE_PROCESSOR and AML_OBJTYPE_POWERRSRC types which are
similar to AML_OBJTYPE_DEVICE.  But AML_OBJTYPE_DEVICE object don't
carry any additional information.  So we end up with just an empty
case to avoid the warning.

Does this work on the Dell machines?


Index: dev/acpi/dsdt.c
===
RCS file: /cvs/src/sys/dev/acpi/dsdt.c,v
retrieving revision 1.252
diff -u -p -r1.252 dsdt.c
--- dev/acpi/dsdt.c 21 Jul 2020 03:48:06 -  1.252
+++ dev/acpi/dsdt.c 28 Jul 2020 09:04:15 -
@@ -996,6 +996,8 @@ aml_copyvalue(struct aml_value *lhs, str
lhs->v_objref = rhs->v_objref;
aml_addref(lhs->v_objref.ref, "");
break;
+   case AML_OBJTYPE_DEVICE:
+   break;
default:
printf("copyvalue: %x", rhs->type);
break;



Re: timekeep: fixing large skews on amd64 with RDTSCP

2020-07-27 Thread Mark Kettenis
> Date: Mon, 27 Jul 2020 17:14:21 +0200
> From: Christian Weisgerber 
> 
> Scott Cheloha:
> 
> > --- lib/libc/arch/amd64/gen/usertc.c8 Jul 2020 09:17:48 -   
> > 1.2
> > +++ lib/libc/arch/amd64/gen/usertc.c25 Jul 2020 17:50:38 -
> > @@ -21,9 +21,12 @@
> >  static inline u_int
> >  rdtsc(void)
> >  {
> > -   uint32_t hi, lo;
> > -   asm volatile("rdtsc" : "=a"(lo), "=d"(hi));
> > -   return ((uint64_t)lo)|(((uint64_t)hi)<<32);
> > +   uint32_t lo;
> > +
> > +   asm volatile("lfence");
> > +   asm volatile("rdtsc" : "=a"(lo) : : "rdx");
> 
> Is there a guarantee that two separate asm()s will not be reordered?

I believe that is true for "volatile" asm statements.  But this is all
not very well documented and I believe that the compiler may hoist
bits of C code in between, which is probably not what you want.

Note that since "asm" is non-standard C, we favour spelling it as
"__asm" since that makes the compiler shut up about it even if you
request stricter C standard compliance.

And given the kernel bit nelow...

> > +
> > +   return lo;
> >  }
> >  
> >  static int
> > --- sys/arch/amd64/amd64/tsc.c  6 Jul 2020 13:33:06 -   1.19
> > +++ sys/arch/amd64/amd64/tsc.c  25 Jul 2020 17:50:38 -
> > @@ -211,7 +211,12 @@ cpu_recalibrate_tsc(struct timecounter *
> >  u_int
> >  tsc_get_timecount(struct timecounter *tc)
> >  {
> > -   return rdtsc() + curcpu()->ci_tsc_skew;
> > +   uint32_t lo;
> > +
> > +   asm volatile("lfence");
> > +   asm volatile("rdtsc" : "=a"(lo) : : "rdx");
> > +
> > +   return lo + curcpu()->ci_tsc_skew;
> >  }
> >  
> >  void
> > 
> 
> I'd just do s/rdtsc/rdtsc_lfence/, which would agree well with the
> rest of the file.

Agreed.  And I would really prefer that the libc code stays as close
to the kernel code as possible.



Re: acpicpu(4) and ACPI0007

2020-07-27 Thread Mark Kettenis
> Date: Mon, 27 Jul 2020 11:10:42 -0400
> From: Bryan Steele 
> 
> On Mon, Jul 27, 2020 at 05:02:41PM +0200, Mark Kettenis wrote:
> > Recent ACPI versions have deprecated "Processor()" nodes in favout of
> > "Device()" nodes with a _HID() method that returns "ACPI0007".  This
> > diff tries to support machines with firmware that implements this.  If
> > you see something like:
> > 
> >   "ACPI0007" at acpi0 not configured
> > 
> > please try the following diff and report back with an updated dmesg.
> > 
> > Cheers,
> > 
> > Mark
> > 
> 
> Wrong diff?

Yes, too many diffs that start with acpi...

Thanks,

Mark

> > Index: dev/acpi/acpi.c
> > ===
> > RCS file: /cvs/src/sys/dev/acpi/acpi.c,v
> > retrieving revision 1.384
> > diff -u -p -r1.384 acpi.c
> > --- dev/acpi/acpi.c 11 May 2020 17:57:17 -  1.384
> > +++ dev/acpi/acpi.c 13 May 2020 18:44:32 -
> > @@ -72,6 +72,7 @@ int   acpi_debug = 16;
> >  
> >  intacpi_poll_enabled;
> >  intacpi_hasprocfvs;
> > +intacpi_haspci;
> >  
> >  #define ACPIEN_RETRIES 15
> >  
> > Index: dev/acpi/acpivar.h
> > ===
> > RCS file: /cvs/src/sys/dev/acpi/acpivar.h,v
> > retrieving revision 1.108
> > diff -u -p -r1.108 acpivar.h
> > --- dev/acpi/acpivar.h  8 May 2020 11:18:01 -   1.108
> > +++ dev/acpi/acpivar.h  13 May 2020 18:44:32 -
> > @@ -43,6 +43,7 @@ extern int acpi_debug;
> >  #endif
> >  
> >  extern int acpi_hasprocfvs;
> > +extern int acpi_haspci;
> >  
> >  struct klist;
> >  struct acpiec_softc;
> > Index: arch/amd64/amd64/mainbus.c
> > ===
> > RCS file: /cvs/src/sys/arch/amd64/amd64/mainbus.c,v
> > retrieving revision 1.49
> > diff -u -p -r1.49 mainbus.c
> > --- arch/amd64/amd64/mainbus.c  7 Sep 2019 13:46:19 -   1.49
> > +++ arch/amd64/amd64/mainbus.c  13 May 2020 18:44:32 -
> > @@ -231,6 +231,13 @@ mainbus_attach(struct device *parent, st
> >  #endif
> >  
> >  #if NPCI > 0
> > +#if NACPI > 0
> > +   if (acpi_haspci) {
> > +   extern void acpipci_attach_busses(struct device *);
> > +
> > +   acpipci_attach_busses(self);
> > +   } else
> > +#endif
> > {
> > pci_init_extents();
> >  
> > @@ -245,9 +252,6 @@ mainbus_attach(struct device *parent, st
> > mba.mba_pba.pba_domain = pci_ndomains++;
> > mba.mba_pba.pba_bus = 0;
> > config_found(self, _pba, mainbus_print);
> > -#if NACPI > 0
> > -   acpi_pciroots_attach(self, _pba, mainbus_print);
> > -#endif
> > }
> >  #endif
> >  
> > Index: arch/amd64/conf/RAMDISK
> > ===
> > RCS file: /cvs/src/sys/arch/amd64/conf/RAMDISK,v
> > retrieving revision 1.77
> > diff -u -p -r1.77 RAMDISK
> > --- arch/amd64/conf/RAMDISK 5 Mar 2020 16:36:30 -   1.77
> > +++ arch/amd64/conf/RAMDISK 13 May 2020 18:44:32 -
> > @@ -30,6 +30,7 @@ acpi0 at bios?
> >  #acpicpu*  at acpi?
> >  acpicmos*  at acpi?
> >  acpiec*at acpi?
> > +acpipci*   at acpi?
> >  acpiprt*   at acpi?
> >  acpimadt0  at acpi?
> >  #acpitz*   at acpi?
> > Index: arch/amd64/conf/RAMDISK_CD
> > ===
> > RCS file: /cvs/src/sys/arch/amd64/conf/RAMDISK_CD,v
> > retrieving revision 1.188
> > diff -u -p -r1.188 RAMDISK_CD
> > --- arch/amd64/conf/RAMDISK_CD  15 Feb 2020 08:49:11 -  1.188
> > +++ arch/amd64/conf/RAMDISK_CD  13 May 2020 18:44:32 -
> > @@ -37,6 +37,7 @@ acpi0 at bios?
> >  #acpicpu*  at acpi?
> >  acpicmos*  at acpi?
> >  acpiec*at acpi?
> > +acpipci*   at acpi?
> >  acpiprt*   at acpi?
> >  acpimadt0  at acpi?
> >  #acpitz*   at acpi?
> > Index: arch/amd64/pci/acpipci.c
> > ===
> > RCS file: /cvs/src/sys/arch/amd64/pci/acpipci.c,v
> > retrieving revision 1.3
> > diff -u -p -r1.3 acpipci.c
> > --- arch/amd64/pci/acpipci.c7 Sep 2019 13:46:19 -   1.3
> > +++ arch/amd64/pci/acpipci.c13 May 2020 18:4

Re: acpicpu(4) and ACPI0007

2020-07-27 Thread Mark Kettenis
> Date: Mon, 27 Jul 2020 17:02:41 +0200 (CEST)
> From: Mark Kettenis 
> 
> Recent ACPI versions have deprecated "Processor()" nodes in favout of
> "Device()" nodes with a _HID() method that returns "ACPI0007".  This
> diff tries to support machines with firmware that implements this.  If
> you see something like:
> 
>   "ACPI0007" at acpi0 not configured
> 
> please try the following diff and report back with an updated dmesg.
> 
> Cheers,
> 
> Mark

And now with the right diff...


Index: dev/acpi/acpicpu.c
===
RCS file: /cvs/src/sys/dev/acpi/acpicpu.c,v
retrieving revision 1.85
diff -u -p -r1.85 acpicpu.c
--- dev/acpi/acpicpu.c  27 May 2020 05:02:21 -  1.85
+++ dev/acpi/acpicpu.c  27 Jul 2020 14:58:38 -
@@ -186,6 +186,11 @@ struct cfdriver acpicpu_cd = {
NULL, "acpicpu", DV_DULL
 };
 
+const char *acpicpu_hids[] = {
+   "ACPI0007",
+   NULL
+};
+
 extern int setperf_prio;
 
 struct acpicpu_softc *acpicpu_sc[MAXCPUS];
@@ -650,6 +655,9 @@ acpicpu_match(struct device *parent, voi
struct acpi_attach_args *aa = aux;
struct cfdata   *cf = match;
 
+   if (acpi_matchhids(aa, acpicpu_hids, cf->cf_driver->cd_name))
+   return (1);
+
/* sanity */
if (aa->aaa_name == NULL ||
strcmp(aa->aaa_name, cf->cf_driver->cd_name) != 0 ||
@@ -665,6 +673,7 @@ acpicpu_attach(struct device *parent, st
struct acpicpu_softc*sc = (struct acpicpu_softc *)self;
struct acpi_attach_args *aa = aux;
struct aml_valueres;
+   int64_t uid;
int i;
uint32_tstatus = 0;
CPU_INFO_ITERATOR   cii;
@@ -675,6 +684,10 @@ acpicpu_attach(struct device *parent, st
acpicpu_sc[sc->sc_dev.dv_unit] = sc;
 
SLIST_INIT(>sc_cstates);
+
+   if (aml_evalinteger(sc->sc_acpi, sc->sc_devnode,
+   "_UID", 0, NULL, ) == 0)
+   sc->sc_cpu = uid;
 
if (aml_evalnode(sc->sc_acpi, sc->sc_devnode, 0, NULL, ) == 0) {
if (res.type == AML_OBJTYPE_PROCESSOR) {



acpicpu(4) and ACPI0007

2020-07-27 Thread Mark Kettenis
Recent ACPI versions have deprecated "Processor()" nodes in favout of
"Device()" nodes with a _HID() method that returns "ACPI0007".  This
diff tries to support machines with firmware that implements this.  If
you see something like:

  "ACPI0007" at acpi0 not configured

please try the following diff and report back with an updated dmesg.

Cheers,

Mark



Index: dev/acpi/acpi.c
===
RCS file: /cvs/src/sys/dev/acpi/acpi.c,v
retrieving revision 1.384
diff -u -p -r1.384 acpi.c
--- dev/acpi/acpi.c 11 May 2020 17:57:17 -  1.384
+++ dev/acpi/acpi.c 13 May 2020 18:44:32 -
@@ -72,6 +72,7 @@ int   acpi_debug = 16;
 
 intacpi_poll_enabled;
 intacpi_hasprocfvs;
+intacpi_haspci;
 
 #define ACPIEN_RETRIES 15
 
Index: dev/acpi/acpivar.h
===
RCS file: /cvs/src/sys/dev/acpi/acpivar.h,v
retrieving revision 1.108
diff -u -p -r1.108 acpivar.h
--- dev/acpi/acpivar.h  8 May 2020 11:18:01 -   1.108
+++ dev/acpi/acpivar.h  13 May 2020 18:44:32 -
@@ -43,6 +43,7 @@ extern int acpi_debug;
 #endif
 
 extern int acpi_hasprocfvs;
+extern int acpi_haspci;
 
 struct klist;
 struct acpiec_softc;
Index: arch/amd64/amd64/mainbus.c
===
RCS file: /cvs/src/sys/arch/amd64/amd64/mainbus.c,v
retrieving revision 1.49
diff -u -p -r1.49 mainbus.c
--- arch/amd64/amd64/mainbus.c  7 Sep 2019 13:46:19 -   1.49
+++ arch/amd64/amd64/mainbus.c  13 May 2020 18:44:32 -
@@ -231,6 +231,13 @@ mainbus_attach(struct device *parent, st
 #endif
 
 #if NPCI > 0
+#if NACPI > 0
+   if (acpi_haspci) {
+   extern void acpipci_attach_busses(struct device *);
+
+   acpipci_attach_busses(self);
+   } else
+#endif
{
pci_init_extents();
 
@@ -245,9 +252,6 @@ mainbus_attach(struct device *parent, st
mba.mba_pba.pba_domain = pci_ndomains++;
mba.mba_pba.pba_bus = 0;
config_found(self, _pba, mainbus_print);
-#if NACPI > 0
-   acpi_pciroots_attach(self, _pba, mainbus_print);
-#endif
}
 #endif
 
Index: arch/amd64/conf/RAMDISK
===
RCS file: /cvs/src/sys/arch/amd64/conf/RAMDISK,v
retrieving revision 1.77
diff -u -p -r1.77 RAMDISK
--- arch/amd64/conf/RAMDISK 5 Mar 2020 16:36:30 -   1.77
+++ arch/amd64/conf/RAMDISK 13 May 2020 18:44:32 -
@@ -30,6 +30,7 @@ acpi0 at bios?
 #acpicpu*  at acpi?
 acpicmos*  at acpi?
 acpiec*at acpi?
+acpipci*   at acpi?
 acpiprt*   at acpi?
 acpimadt0  at acpi?
 #acpitz*   at acpi?
Index: arch/amd64/conf/RAMDISK_CD
===
RCS file: /cvs/src/sys/arch/amd64/conf/RAMDISK_CD,v
retrieving revision 1.188
diff -u -p -r1.188 RAMDISK_CD
--- arch/amd64/conf/RAMDISK_CD  15 Feb 2020 08:49:11 -  1.188
+++ arch/amd64/conf/RAMDISK_CD  13 May 2020 18:44:32 -
@@ -37,6 +37,7 @@ acpi0 at bios?
 #acpicpu*  at acpi?
 acpicmos*  at acpi?
 acpiec*at acpi?
+acpipci*   at acpi?
 acpiprt*   at acpi?
 acpimadt0  at acpi?
 #acpitz*   at acpi?
Index: arch/amd64/pci/acpipci.c
===
RCS file: /cvs/src/sys/arch/amd64/pci/acpipci.c,v
retrieving revision 1.3
diff -u -p -r1.3 acpipci.c
--- arch/amd64/pci/acpipci.c7 Sep 2019 13:46:19 -   1.3
+++ arch/amd64/pci/acpipci.c13 May 2020 18:44:32 -
@@ -53,6 +53,19 @@ struct acpipci_softc {
struct device   sc_dev;
struct acpi_softc *sc_acpi;
struct aml_node *sc_node;
+
+   bus_space_tag_t sc_iot;
+   bus_space_tag_t sc_memt;
+   bus_dma_tag_t   sc_dmat;
+
+   struct extent   *sc_busex;
+   struct extent   *sc_memex;
+   struct extent   *sc_ioex;
+   charsc_busex_name[32];
+   charsc_ioex_name[32];
+   charsc_memex_name[32];
+   int sc_bus;
+   uint32_tsc_seg;
 };
 
 intacpipci_match(struct device *, void *, void *);
@@ -72,6 +85,11 @@ const char *acpipci_hids[] = {
NULL
 };
 
+void   acpipci_attach_deferred(struct device *);
+intacpipci_print(void *, const char *);
+intacpipci_parse_resources(int, union acpi_resource *, void *);
+void   acpipci_osc(struct acpipci_softc *);
+
 int
 acpipci_match(struct device *parent, void *match, void *aux)
 {
@@ -86,15 +104,225 @@ acpipci_attach(struct device *parent, st
 {
struct acpi_attach_args *aaa = aux;
struct acpipci_softc *sc = (struct acpipci_softc *)self;
-   struct aml_value args[4];
struct aml_value res;
-   static uint8_t uuid[16] = ACPI_PCI_UUID;
-   uint32_t buf[3];
+   uint64_t bbn = 0;
+   uint64_t seg = 0;
+
+   

bge(4) fix

2020-07-26 Thread Mark Kettenis
Booted up the old v210 to test something and noticed that it prints a
couple of:

  bge0: nvram lock timed out

warnings when booting up.  These are the on-board network interfaces
and we already established in the past that these come without
EEPROM/NVRAM and instead rely on the firmware to provide the MAC
address.

The diff below kills these messages.

ok?


Index: dev/pci/if_bge.c
===
RCS file: /cvs/src/sys/dev/pci/if_bge.c,v
retrieving revision 1.391
diff -u -p -r1.391 if_bge.c
--- dev/pci/if_bge.c10 Jul 2020 13:26:37 -  1.391
+++ dev/pci/if_bge.c26 Jul 2020 16:04:43 -
@@ -3235,7 +3235,8 @@ bge_reset(struct bge_softc *sc)
write_op = bge_writereg_ind;
 
if (BGE_ASICREV(sc->bge_chipid) != BGE_ASICREV_BCM5700 &&
-   BGE_ASICREV(sc->bge_chipid) != BGE_ASICREV_BCM5701) {
+   BGE_ASICREV(sc->bge_chipid) != BGE_ASICREV_BCM5701 &&
+   !(sc->bge_flags & BGE_NO_EEPROM)) {
CSR_WRITE_4(sc, BGE_NVRAM_SWARB, BGE_NVRAMSWARB_SET1);
for (i = 0; i < 8000; i++) {
if (CSR_READ_4(sc, BGE_NVRAM_SWARB) &



Re: change ktime to nanoseconds in drm

2020-07-21 Thread Mark Kettenis
> Date: Tue, 21 Jul 2020 19:33:21 +1000
> From: Jonathan Gray 
> 
> Change from using timevals for ktime to 64 bit count of nanoseconds
> to closer match linux.  From a discussion with cheloha@

ok kettenis@

> Index: sys/dev/pci/drm/drm_vblank.c
> ===
> RCS file: /cvs/src/sys/dev/pci/drm/drm_vblank.c,v
> retrieving revision 1.5
> diff -u -p -r1.5 drm_vblank.c
> --- sys/dev/pci/drm/drm_vblank.c  8 Jun 2020 04:47:58 -   1.5
> +++ sys/dev/pci/drm/drm_vblank.c  21 Jul 2020 07:00:48 -
> @@ -184,7 +184,7 @@ static void drm_reset_vblank_timestamp(s
>* interrupt and assign 0 for now, to mark the vblanktimestamp as 
> invalid.
>*/
>   if (!rc)
> - t_vblank = (struct timeval) {0, 0};
> + t_vblank = 0;
>  
>   /*
>* +1 to make sure user will never see the same
> @@ -293,7 +293,7 @@ static void drm_update_vblank_count(stru
>* for now, to mark the vblanktimestamp as invalid.
>*/
>   if (!rc && !in_vblank_irq)
> - t_vblank = (struct timeval) {0, 0};
> + t_vblank = 0;
>  
>   store_vblank(dev, pipe, diff, t_vblank, cur_vblank);
>  }
> @@ -871,7 +871,7 @@ static u64 drm_vblank_count_and_time(str
>   unsigned int seq;
>  
>   if (WARN_ON(pipe >= dev->num_crtcs)) {
> - *vblanktime = (struct timeval) {0, 0};
> + *vblanktime = 0;
>   return 0;
>   }
>  
> Index: sys/dev/pci/drm/include/linux/ktime.h
> ===
> RCS file: /cvs/src/sys/dev/pci/drm/include/linux/ktime.h,v
> retrieving revision 1.4
> diff -u -p -r1.4 ktime.h
> --- sys/dev/pci/drm/include/linux/ktime.h 7 Jul 2020 04:05:25 -   
> 1.4
> +++ sys/dev/pci/drm/include/linux/ktime.h 21 Jul 2020 07:00:48 -
> @@ -22,42 +22,40 @@
>  #include 
>  #include 
>  
> -typedef struct timeval ktime_t;
> +typedef int64_t ktime_t;
>  
> -static inline struct timeval
> +static inline ktime_t
>  ktime_get(void)
>  {
> - struct timeval tv;
> - 
> - microuptime();
> - return tv;
> + struct timespec ts;
> + nanouptime();
> + return TIMESPEC_TO_NSEC();
>  }
>  
> -static inline struct timeval
> +static inline ktime_t
>  ktime_get_raw(void)
>  {
> - struct timeval tv;
> - 
> - microuptime();
> - return tv;
> + struct timespec ts;
> + nanouptime();
> + return TIMESPEC_TO_NSEC();
>  }
>  
>  static inline int64_t
> -ktime_to_ms(struct timeval tv)
> +ktime_to_ms(ktime_t k)
>  {
> - return timeval_to_ms();
> + return k / NSEC_PER_MSEC;
>  }
>  
>  static inline int64_t
> -ktime_to_us(struct timeval tv)
> +ktime_to_us(ktime_t k)
>  {
> - return timeval_to_us();
> + return k / NSEC_PER_USEC;
>  }
>  
>  static inline int64_t
> -ktime_to_ns(struct timeval tv)
> +ktime_to_ns(ktime_t k)
>  {
> - return timeval_to_ns();
> + return k;
>  }
>  
>  static inline int64_t
> @@ -67,70 +65,66 @@ ktime_get_raw_ns(void)
>  }
>  
>  static inline struct timespec64
> -ktime_to_timespec64(struct timeval tv)
> +ktime_to_timespec64(ktime_t k)
>  {
>   struct timespec64 ts;
> - ts.tv_sec = tv.tv_sec;
> - ts.tv_nsec = tv.tv_usec * NSEC_PER_USEC;
> + ts.tv_sec = k / NSEC_PER_SEC;
> + ts.tv_nsec = k % NSEC_PER_SEC;
>   return ts;
>  }
>  
> -static inline struct timeval
> -ktime_sub(struct timeval a, struct timeval b)
> +static inline ktime_t
> +ktime_sub(ktime_t a, ktime_t b)
>  {
> - struct timeval res;
> - timersub(, , );
> - return res;
> + return a - b;
>  }
>  
> -static inline struct timeval
> -ktime_add(struct timeval a, struct timeval b)
> +static inline ktime_t
> +ktime_add(ktime_t a, ktime_t b)
>  {
> - struct timeval res;
> - timeradd(, , );
> - return res;
> + return a + b;
>  }
>  
> -static inline struct timeval
> -ktime_add_us(struct timeval tv, int64_t us)
> +static inline ktime_t
> +ktime_add_us(ktime_t k, uint64_t us)
>  {
> - return ns_to_timeval(timeval_to_ns() + (us * NSEC_PER_USEC));
> + return k + (us * NSEC_PER_USEC);
>  }
>  
> -static inline struct timeval
> -ktime_add_ns(struct timeval tv, int64_t ns)
> +static inline ktime_t
> +ktime_add_ns(ktime_t k, int64_t ns)
>  {
> - return ns_to_timeval(timeval_to_ns() + ns);
> + return k + ns;
>  }
>  
> -static inline struct timeval
> -ktime_sub_ns(struct timeval tv, int64_t ns)
> +static inline ktime_t
> +ktime_sub_ns(ktime_t k, int64_t ns)
>  {
> - return ns_to_timeval(timeval_to_ns() - ns);
> + return k - ns;
>  }
>  
>  static inline int64_t
> -ktime_us_delta(struct timeval a, struct timeval b)
> +ktime_us_delta(ktime_t a, ktime_t b)
>  {
>   return ktime_to_us(ktime_sub(a, b));
>  }
>  
>  static inline int64_t
> -ktime_ms_delta(struct timeval a, struct timeval b)
> +ktime_ms_delta(ktime_t a, ktime_t b)
>  {
>   return ktime_to_ms(ktime_sub(a, b));
>  }
>  
>  static 

  1   2   3   4   5   6   7   8   9   10   >