> Date: Tue, 25 Mar 2025 15:11:09 +0100
> From: Martin Pieuchot <m...@grenadille.net>
> 
> David Higgs recently reported an incorrect usage of uvm_map_protect(9):
>   https://marc.info/?l=openbsd-tech&m=174001772620750&w=2
> 
> It turns out there's another one in exec_sigcode_map(), fixed by the
> diff below.  Currently the uvm_map_protect(9) calls has no effect and
> returns EINVAL.
> 
> However, on amd64, with the diff applied the kernel faults when writing
> to curproc.  In the trace below tatclock+0x108 corresponds to
> tu_enter(&p->p_tu) in statclock().
> 
> panic(ffffffff81d89ec1) at panic+0xdd
> witness_checkorder(ffffffff820236d0,1,0) at witness_checkorder+0x8ae
> rw_do_enter_read(ffffffff820236b8,0) at rw_do_enter_read+0x4c
> uvmfault_lookup(ffff800003aa1810,0) at uvmfault_lookup+0x8a
> uvm_fault_check(ffff800003aa1810,ffff800003aa1848,ffff800003aa1878,0) at 
> uvm_fa
> ult_check+0x38
> uvm_fault(ffffffff820235d0,ffff8000fffff000,0,2) at uvm_fault+0xed
> kpageflttrap(ffff800003aa1990,ffff8000fffff8a8) at kpageflttrap+0x158
> kerntrap(ffff800003aa1990) at kerntrap+0xaf
> alltraps_kern_meltdown() at alltraps_kern_meltdown+0x7b
> statclock(ffffffff81fd8dc8,ffff800003aa1bb0,0) at statclock+0x108
> clockintr_dispatch(ffff800003aa1bb0) at clockintr_dispatch+0x249
> clockintr(ffff800003aa1bb0) at clockintr+0x59
> intr_handler(ffff800003aa1bb0,ffff80000008fe80) at intr_handler+0x91
> Xintr_legacy0_untramp() at Xintr_legacy0_untramp+0x1a3
> pmap_write_protect(ffffffff82022a20,ffff800003b14000,ffff800003b15000,1) at 
> pma
> p_write_protect+0x13f
> uvm_map_protect(ffffffff820235d0,ffff800003b14000,ffff800003b15000,1,0,0,56f497
> 690e6c544a) at uvm_map_protect+0x47a
> exec_sigcode_map(ffff800003aa7288) at exec_sigcode_map+0x22e
> sys_execve(ffff8000fffff710,ffff800003aa2330,ffff800003aa23e0) at 
> sys_execve+0x
> 11db       
> start_init(ffff8000fffff710) at start_init+0x2e8
> 
> On arm64 the kernel works fine with this diff.
> 
> Any idea of what is happening?

Not really.  But this is starting init, which has always been a bit
weird.  And I'm not really sure it makes sense for this initial exec
of init to be interrupted by a clock interrupt that runs statclock().

> Index: kern/exec_subr.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/exec_subr.c,v
> diff -u -p -r1.68 exec_subr.c
> --- kern/exec_subr.c  2 Nov 2024 10:02:23 -0000       1.68
> +++ kern/exec_subr.c  10 Mar 2025 19:26:50 -0000
> @@ -260,7 +260,7 @@ vmcmd_map_readvn(struct proc *p, struct 
>                * uvm_map_protect() to fix up the protection.  ICK.
>                */
>               error = (uvm_map_protect(&p->p_vmspace->vm_map,
> -                 cmd->ev_addr, round_page(cmd->ev_len),
> +                 cmd->ev_addr, round_page(cmd->ev_addr + cmd->ev_len),
>                   prot, 0, FALSE, TRUE));
>       }
>       if (error == 0) {
> Index: kern/kern_exec.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_exec.c,v
> diff -u -p -r1.262 kern_exec.c
> --- kern/kern_exec.c  17 Feb 2025 10:07:10 -0000      1.262
> +++ kern/kern_exec.c  25 Mar 2025 13:56:34 -0000
> @@ -874,12 +874,11 @@ exec_sigcode_map(struct process *pr)
>               int r;
>  
>               sigobject = uao_create(sz, 0);
> -             uao_reference(sigobject);       /* permanent reference */
> -
>               if ((r = uvm_map(kernel_map, &va, round_page(sz), sigobject,
>                   0, 0, UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | 
> PROT_WRITE,
>                   MAP_INHERIT_SHARE, MADV_RANDOM, 0)))) {
>                       uao_detach(sigobject);
> +                     sigobject = NULL;
>                       return (ENOMEM);
>               }
>  
> @@ -890,9 +889,13 @@ exec_sigcode_map(struct process *pr)
>                       left -= chunk;
>               }
>               memcpy((caddr_t)va, sigcode, sz);
> -
> -             (void) uvm_map_protect(kernel_map, va, round_page(sz),
> +             r = uvm_map_protect(kernel_map, va, round_page(va + sz),
>                   PROT_READ, 0, FALSE, FALSE);
> +             if (r) {
> +                     uao_detach(sigobject);
> +                     sigobject = NULL;
> +                     return (r);
> +             }
>               sigcode_va = va;
>               sigcode_sz = round_page(sz);
>       }
> @@ -927,8 +930,6 @@ exec_timekeep_map(struct process *pr)
>               vaddr_t va = 0;
>  
>               timekeep_object = uao_create(timekeep_sz, 0);
> -             uao_reference(timekeep_object);
> -
>               if (uvm_map(kernel_map, &va, timekeep_sz, timekeep_object,
>                   0, 0, UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | 
> PROT_WRITE,
>                   MAP_INHERIT_SHARE, MADV_RANDOM, 0))) {
> 
> 
> 

Reply via email to