Re: Patch: Sending credentials over Unix datagram sockets
GMail seems determined to ruin the formatting. Diff is now an attachment instead. -Original Message- From: David Mackay Sent: 18 March 2020 03:24 To: 'tech@openbsd.org' Subject: RE: Patch: Sending credentials over Unix datagram sockets Sorry, looks like my mail client mangled the previous message. I've reproduced the message below and made the diff an attachment in case it happens again. Dear openbsd-tech, On GNU HURD and FreeBSD, the control message SCM_CREDS may be allocated by a client of a Unix datagram socket. When the kernel encounters this, it fills out a struct cmsgcred containing PID, UID, GID, effective UID, and effective GIDs of the sender. This patch implements this for OpenBSD. Kind regards, David -Original Message- From: David Mackay Sent: 18 March 2020 02:53 To: 'tech@openbsd.org' Subject: Patch: Sending credentials over Unix datagram sockets Dear openbsd-tech, On GNU HURD and FreeBSD, the control message SCM_CREDS may be allocated by a client of a Unix datagram socket. When the kernel encounters this, it fills out a struct cmsgcred containing PID, UID, GID, effective UID, and effective GIDs of the sender. This patch implements this for OpenBSD. Kind regards, David diff --git sys/kern/uipc_usrreq.c sys/kern/uipc_usrreq.c index 8a735f3..7df3846 100644 --- sys/kern/uipc_usrreq.c +++ sys/kern/uipc_usrreq.c @@ -688,9 +688,10 @@ unp_externalize(struct mbuf *rights, socklen_t controllen, int flags) /* * This code only works because SCM_RIGHTS is the only supported * control message type on unix sockets. Enforce this here. +* If it isn't, then return. */ if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET) - return EINVAL; + return (0); nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof(struct fdpass); @@ -831,10 +832,26 @@ unp_internalize(struct mbuf *control, struct proc *p) */ if (control->m_len < CMSG_LEN(0) || cm->cmsg_len < CMSG_LEN(0)) return (EINVAL); - if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET || - !(cm->cmsg_len == control->m_len || + if (!(cm->cmsg_type == SCM_RIGHTS || cm->cmsg_type == SCM_CREDS) || + cm->cmsg_level != SOL_SOCKET || !(cm->cmsg_len == control->m_len || control->m_len == CMSG_ALIGN(cm->cmsg_len))) return (EINVAL); + + if(cm->cmsg_type == SCM_CREDS) { + struct cmsgcred * cmcred = (struct cmsgcred *)CMSG_DATA(cm); + + cmcred->cmcred_pid = p->p_p->ps_pid; + cmcred->cmcred_uid = p->p_ucred->cr_ruid; + cmcred->cmcred_euid = p->p_ucred->cr_uid; + cmcred->cmcred_gid = p->p_ucred->cr_rgid; + cmcred->cmcred_ngroups = MIN(p->p_ucred->cr_ngroups, + CMGROUP_MAX); + for (i = 0; i < cmcred->cmcred_ngroups; i++) + cmcred->cmcred_groups[i] = p->p_ucred->cr_groups[i]; + + return (0); + } + nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof (int); if (unp_rights + nfds > maxfiles / 10) diff --git sys/sys/socket.h sys/sys/socket.h index 927fd26..75c2319 100644 --- sys/sys/socket.h +++ sys/sys/socket.h @@ -486,6 +486,26 @@ struct cmsghdr { /* followed by u_char cmsg_data[]; */ }; +/* + * Only 16 groups are sent because struct mbuf is a fixed size; these must fit. + */ +#define CMGROUP_MAX 16 + +/* + * Credentials as attached to control messages of type SCM_CREDS. + * The sender must specify this control message, but the kernel + * will overwrite anything the sender put in the actual structure + * with the real values. + */ +struct cmsgcred { + pid_t cmcred_pid; /* sender PID */ + uid_t cmcred_uid; /* sender real UID */ + uid_t cmcred_euid;/* sender effective UID */ + gid_t cmcred_gid; /* sender real GID*/ + short cmcred_ngroups; /* number of GIDs */ + gid_t cmcred_groups[CMGROUP_MAX]; /* GIDs */ +} + /* given pointer to struct cmsghdr, return pointer to data */ #defineCMSG_DATA(cmsg) \ ((unsigned char *)(cmsg) + _ALIGN(sizeof(struct cmsghdr))) @@ -520,6 +540,7 @@ struct cmsghdr { /* "Socket"-level control message types: */ #defineSCM_RIGHTS 0x01/* access rights (array of int) */ +#defineSCM_CREDS 0x02/* sender credentials (struct cmsgcred) */ #defineSCM_TIMESTAMP 0x04/* timestamp (struct timeval) */ #ifndef _KERNEL creds.patch Description: Binary data
Re: Patch: Sending credentials over Unix datagram sockets
Sorry, looks like my mail client mangled the previous message. I've reproduced the message below and made the diff an attachment in case it happens again. Dear openbsd-tech, On GNU HURD and FreeBSD, the control message SCM_CREDS may be allocated by a client of a Unix datagram socket. When the kernel encounters this, it fills out a struct cmsgcred containing PID, UID, GID, effective UID, and effective GIDs of the sender. This patch implements this for OpenBSD. Kind regards, David -Original Message- From: David Mackay Sent: 18 March 2020 02:53 To: 'tech@openbsd.org' Subject: Patch: Sending credentials over Unix datagram sockets Dear openbsd-tech, On GNU HURD and FreeBSD, the control message SCM_CREDS may be allocated by a client of a Unix datagram socket. When the kernel encounters this, it fills out a struct cmsgcred containing PID, UID, GID, effective UID, and effective GIDs of the sender. This patch implements this for OpenBSD. Kind regards, David diff --git sys/kern/uipc_usrreq.c sys/kern/uipc_usrreq.c index 8a735f3..7df3846 100644 --- sys/kern/uipc_usrreq.c +++ sys/kern/uipc_usrreq.c @@ -688,9 +688,10 @@ unp_externalize(struct mbuf *rights, socklen_t controllen, int flags) /* * This code only works because SCM_RIGHTS is the only supported * control message type on unix sockets. Enforce this here. +* If it isn't, then return. */ if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET) - return EINVAL; + return (0); nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof(struct fdpass); @@ -831,10 +832,26 @@ unp_internalize(struct mbuf *control, struct proc *p) */ if (control->m_len < CMSG_LEN(0) || cm->cmsg_len < CMSG_LEN(0)) return (EINVAL); - if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET || - !(cm->cmsg_len == control->m_len || + if (!(cm->cmsg_type == SCM_RIGHTS || cm->cmsg_type == SCM_CREDS) || + cm->cmsg_level != SOL_SOCKET || !(cm->cmsg_len == control->m_len || control->m_len == CMSG_ALIGN(cm->cmsg_len))) return (EINVAL); + + if(cm->cmsg_type == SCM_CREDS) { + struct cmsgcred * cmcred = (struct cmsgcred *)CMSG_DATA(cm); + + cmcred->cmcred_pid = p->p_p->ps_pid; + cmcred->cmcred_uid = p->p_ucred->cr_ruid; + cmcred->cmcred_euid = p->p_ucred->cr_uid; + cmcred->cmcred_gid = p->p_ucred->cr_rgid; + cmcred->cmcred_ngroups = MIN(p->p_ucred->cr_ngroups, + CMGROUP_MAX); + for (i = 0; i < cmcred->cmcred_ngroups; i++) + cmcred->cmcred_groups[i] = p->p_ucred->cr_groups[i]; + + return (0); + } + nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof (int); if (unp_rights + nfds > maxfiles / 10) diff --git sys/sys/socket.h sys/sys/socket.h index 927fd26..75c2319 100644 --- sys/sys/socket.h +++ sys/sys/socket.h @@ -486,6 +486,26 @@ struct cmsghdr { /* followed by u_char cmsg_data[]; */ }; +/* + * Only 16 groups are sent because struct mbuf is a fixed size; these must fit. + */ +#define CMGROUP_MAX 16 + +/* + * Credentials as attached to control messages of type SCM_CREDS. + * The sender must specify this control message, but the kernel + * will overwrite anything the sender put in the actual structure + * with the real values. + */ +struct cmsgcred { + pid_t cmcred_pid; /* sender PID */ + uid_t cmcred_uid; /* sender real UID */ + uid_t cmcred_euid;/* sender effective UID */ + gid_t cmcred_gid; /* sender real GID*/ + short cmcred_ngroups; /* number of GIDs */ + gid_t cmcred_groups[CMGROUP_MAX]; /* GIDs */ +} + /* given pointer to struct cmsghdr, return pointer to data */ #defineCMSG_DATA(cmsg) \ ((unsigned char *)(cmsg) + _ALIGN(sizeof(struct cmsghdr))) @@ -520,6 +540,7 @@ struct cmsghdr { /* "Socket"-level control message types: */ #defineSCM_RIGHTS 0x01/* access rights (array of int) */ +#defineSCM_CREDS 0x02/* sender credentials (struct cmsgcred) */ #defineSCM_TIMESTAMP 0x04/* timestamp (struct timeval) */ #ifndef _KERNEL
Patch: Sending credentials over Unix datagram sockets
Dear openbsd-tech, On GNU HURD and FreeBSD, the control message SCM_CREDS may be allocated by a client of a Unix datagram socket. When the kernel encounters this, it fills out a struct cmsgcred containing PID, UID, GID, effective UID, and effective GIDs of the sender. This patch implements this for OpenBSD. Kind regards, David diff --git sys/kern/uipc_usrreq.c sys/kern/uipc_usrreq.c index 8a735f3..7df3846 100644 --- sys/kern/uipc_usrreq.c +++ sys/kern/uipc_usrreq.c @@ -688,9 +688,10 @@ unp_externalize(struct mbuf *rights, socklen_t controllen, int flags) /* * This code only works because SCM_RIGHTS is the only supported * control message type on unix sockets. Enforce this here. +* If it isn't, then return. */ if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET) - return EINVAL; + return (0); nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof(struct fdpass); @@ -831,10 +832,26 @@ unp_internalize(struct mbuf *control, struct proc *p) */ if (control->m_len < CMSG_LEN(0) || cm->cmsg_len < CMSG_LEN(0)) return (EINVAL); - if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET || - !(cm->cmsg_len == control->m_len || + if (!(cm->cmsg_type == SCM_RIGHTS || cm->cmsg_type == SCM_CREDS) || + cm->cmsg_level != SOL_SOCKET || !(cm->cmsg_len == control->m_len || control->m_len == CMSG_ALIGN(cm->cmsg_len))) return (EINVAL); + + if(cm->cmsg_type == SCM_CREDS) { + struct cmsgcred * cmcred = (struct cmsgcred *)CMSG_DATA(cm); + + cmcred->cmcred_pid = p->p_p->ps_pid; + cmcred->cmcred_uid = p->p_ucred->cr_ruid; + cmcred->cmcred_euid = p->p_ucred->cr_uid; + cmcred->cmcred_gid = p->p_ucred->cr_rgid; + cmcred->cmcred_ngroups = MIN(p->p_ucred->cr_ngroups, + CMGROUP_MAX); + for (i = 0; i < cmcred->cmcred_ngroups; i++) + cmcred->cmcred_groups[i] = p->p_ucred->cr_groups[i]; + + return (0); + } + nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof (int); if (unp_rights + nfds > maxfiles / 10) diff --git sys/sys/socket.h sys/sys/socket.h index 927fd26..75c2319 100644 --- sys/sys/socket.h +++ sys/sys/socket.h @@ -486,6 +486,26 @@ struct cmsghdr { /* followed by u_char cmsg_data[]; */ }; +/* + * Only 16 groups are sent because struct mbuf is a fixed size; these must fit. + */ +#define CMGROUP_MAX 16 + +/* + * Credentials as attached to control messages of type SCM_CREDS. + * The sender must specify this control message, but the kernel + * will overwrite anything the sender put in the actual structure + * with the real values. + */ +struct cmsgcred { + pid_t cmcred_pid; /* sender PID */ + uid_t cmcred_uid; /* sender real UID */ + uid_t cmcred_euid;/* sender effective UID */ + gid_t cmcred_gid; /* sender real GID*/ + short cmcred_ngroups; /* number of GIDs */ + gid_t cmcred_groups[CMGROUP_MAX]; /* GIDs */ +} + /* given pointer to struct cmsghdr, return pointer to data */ #defineCMSG_DATA(cmsg) \ ((unsigned char *)(cmsg) + _ALIGN(sizeof(struct cmsghdr))) @@ -520,6 +540,7 @@ struct cmsghdr { /* "Socket"-level control message types: */ #defineSCM_RIGHTS 0x01/* access rights (array of int) */ +#defineSCM_CREDS 0x02/* sender credentials (struct cmsgcred) */ #defineSCM_TIMESTAMP 0x04/* timestamp (struct timeval) */ #ifndef _KERNEL
Re: Regarding the understanding of the malloc(3) code
On Fri, Mar 13, 2020, at 11:45 AM Otto Moerbeek wrote: > > Please indent your code snippets. yeah, my apologies. I shall indent the code snippets. > > di_info is special. Having a guard page on both sides for regular > allocation can be done, but would waste more pages. Note that > allocations are already spread throughout the address space, so it is > very likely that an allocation is surrounded by unmapped pages. > Okay, So, as from omalloc_poolinit() code it is not there, but we can do but it will be wastage of one-page memory and also entire address space is spread with unmapped pages. So, it is very likely that there will some unmapped page beside dir_info in the memory. > > We need two pages to store dir_info. > > > > Now, MMAPNONE maps up to len (8192 + (4096 * 2)) = 16384 > > We allocate 4 pages prot none. > > > then, mprotecting the pages through p + MALLOC_PAGESIZE + DIR_INFO_RSZ - 1 > > the two middle pages are r/w. > > > d_avail = (8192 - 4824) >> 4 = 3368 >> 4 = 210 > > > > Now, d = (p + MALLOC_PAGESIZE + (random_no_under_210 << 4) > > di_info ends up on an aligned address somewhere in the middle pages on > an offset between 0 and (210<<4) = 0..3360, counting from the start of > the two middle pages. Thank you for the information. So, it is like [(p + MALLOC_PAGESIZE) + (0..3360)]. It is kind of like array, For example, let's suppose, x = p + MALLOC_PAGESIZE. So, it will be x[0..3360]. Am I right? > > MALLOC_CHUNK_LISTS could be increased at the cost of overhead. > MALLOC_MAXSHIFT cannot, it is the shift of the max chunk size that fits in > a page. Okay. I understood. And, yeah more randomization means more cost overhead. Thank you, Otto, for your detailed information. Please find the code below: 948 /* 949 * Allocate a chunk 950 */ 951 static void * 952 malloc_bytes(struct dir_info *d, size_t size, void *f) 953 { 954 u_int i, r; 955 int j, listnum; 956 size_t k; 957 u_short *lp; 958 struct chunk_info *bp; 959 void *p; 960 961 if (mopts.malloc_canary != (d->canary1 ^ (u_int32_t)(uintptr_t)d) || 962 d->canary1 != ~d->canary2) 963 wrterror(d, "internal struct corrupt"); 964 965 j = find_chunksize(size); 966 967 r = ((u_int)getrbyte(d) << 8) | getrbyte(d); 968 listnum = r % MALLOC_CHUNK_LISTS; 969 /* If it's empty, make a page more of that size chunks */ 970 if ((bp = LIST_FIRST(>chunk_dir[j][listnum])) == NULL) { 971 bp = omalloc_make_chunks(d, j, listnum); 972 if (bp == NULL) 973 return NULL; 974 } 975 976 if (bp->canary != (u_short)d->canary1) 977 wrterror(d, "chunk info corrupted"); Here, in the code mentioned above, we can see that on line 961 and line 976. I don't understand why it is checking for canaries of malloc_readonly with d and then allocated chunk bp with d, because I have seen that validation of canary happens in free(3) not in malloc(3). So, it is like there may be some cases where one can corrupt these also?? 978 979 i = (r / MALLOC_CHUNK_LISTS) & (bp->total - 1); 980 981 /* start somewhere in a short */ 982 lp = >bits[i / MALLOC_BITS]; 983 if (*lp) { 984 j = i % MALLOC_BITS; 985 k = ffs(*lp >> j); 986 if (k != 0) { 987 k += j - 1; 988 goto found; 989 } 990 } 991 /* no bit halfway, go to next full short */ 992 i /= MALLOC_BITS; 993 for (;;) { 994 if (++i >= bp->total / MALLOC_BITS) 995 i = 0; 996 lp = >bits[i]; 997 if (*lp) { 998 k = ffs(*lp) - 1; 999 break; 1000} 1001} 1002found: 1003#ifdef MALLOC_STATS 1004if (i == 0 && k == 0) { 1005struct region_info *r = find(d, bp->page); 1006r->f = f; 1007} 1008#endif 1009 1010*lp ^= 1 << k; 1011 1012/* If there are no more free, remove from free-list */ 1013if (--bp->free == 0) 1014LIST_REMOVE(bp, entries); 1015 1016/* Adjust to the real offset of that chunk */ 1017k += (lp - bp->bits) * MALLOC_BITS; 1018 1019if (mopts.chunk_canaries && size > 0) 1020bp->bits[bp->offset + k] = size; 1021 1022k <<= bp->shift; 1023 1024p = (char *)bp->page + k; 1025if (bp->size > 0) { 1026if (d->malloc_junk == 2) 1027
Re: [PLEASE TEST] acpi(4): acpi_sleep(): tsleep(9) -> tsleep_nsec(9)
On Sun, Mar 15, 2020, at 21:55, Scott Cheloha wrote: > Hi, > > This is a straightforward ticks-to-milliseconds conversion, but IIRC > pirofti@ wanted me to get some tests before committing it. > > The only users of acpi_sleep() are (a) acpitz(4) and (b) any AML code > that uses AMLOP_SLEEP. AMLOP_SLEEP seems to trigger just before a > suspend. I don't know when else it is used. > > If you have an acpi(4) laptop with suspend/resume support, please > apply this patch and let me know if anything doesn't work, > particularly with suspend/resume. > > I've been running with this since November without apparent issue... > But this driver impacts many people, so I think I need more tests. > > Cheers, > > Scott > No problems so far on a Thinkpad X1C7 Lucas
vmm Linux guest crashes frequently
hello, i am running vmd on current and one of the guests I have is an Ubuntu 18.04.4 LTS. I have two problems, first this guest will crash frequently and second when restarting this guest it will often crash on restart. Starting with the second issue first, when the guest has crashed and is started back up it will sometimes crash, but only during fsck, this makes me suspect disk I/O being the culprit. The output I get when doing 'vmctl start -c 2' is: [0.00] ACPI BIOS Error (bug): A valid RSDP was not found (20170831/tbxfroot-244) a command-line. [1.096305] mce: Unable to init MCE device (rc: -5) /dev/vda1: recovering journal /dev/vda1: clean, 291619/1966080 files, 4637993/7863808 blocks [EOT] I've attached the the full log displayed during startup (with VMM_DEBUG) the one thing I noticed was this seems to happen just before each crash: Mar 17 11:08:40 absu /bsd: vmx_handle_wrmsr: wrmsr exit, msr=0xc90, discarding data written from guest=0x0:0xf Mar 17 11:08:40 absu /bsd: vmx_handle_rdmsr: rdmsr exit, msr=0xc90, data returned to guest=0x0:0x0 Mar 17 11:08:40 absu vmd[74219]: vcpu_process_com_data: guest reading com1 when not ready Mar 17 11:08:40 absu vmd[74219]: vcpu_process_com_data: guest reading com1 when not ready Mar 17 11:08:40 absu /bsd: vmx_handle_rdmsr: rdmsr exit, msr=0x3b, data returned to guest=0x0:0x0 Mar 17 11:08:40 absu vmd[74219]: vioblk_notifyq: unsupported command 0x8 Mar 17 11:08:40 absu vmd[74219]: vioblk_notifyq: unsupported command 0x8 Mar 17 11:08:42 absu vmd[74219]: vmd: vm 3 event thread exited unexpectedly Mar 17 11:08:42 absu /bsd: vmm_free_vpid: freed VPID/ASID 2 Generally after a few attempts it will get passed this and then start up normally. The issue of the guest crashing is harder to pin down, it's random, I've gone days with everything running fine, sometimes it crashes multiple times a day. Most common is about once a day or so. When it did crash even with VMM_DEBUG I didn't see much: Mar 17 14:27:13 absu /bsd: vmx_handle_rdmsr: rdmsr exit, msr=0x3b, data returned to guest=0x0:0x0 Mar 17 14:27:46 absu last message repeated 6 times Mar 17 14:29:46 absu last message repeated 22 times Mar 17 14:39:44 absu last message repeated 109 times Mar 17 14:49:46 absu last message repeated 110 times Mar 17 14:56:53 absu last message repeated 78 times Mar 17 15:09:45 absu last message repeated 63 times Mar 17 15:19:46 absu last message repeated 78 times Mar 17 15:29:48 absu last message repeated 110 times Mar 17 15:39:50 absu last message repeated 110 times Mar 17 15:49:47 absu last message repeated 109 times Mar 17 15:59:49 absu last message repeated 110 times Mar 17 16:09:49 absu last message repeated 109 times Mar 17 16:19:49 absu last message repeated 74 times Mar 17 16:29:45 absu last message repeated 107 times Mar 17 16:39:47 absu last message repeated 109 times Mar 17 16:49:51 absu last message repeated 110 times Mar 17 16:59:47 absu last message repeated 108 times Mar 17 17:00:59 absu last message repeated 12 times Mar 17 17:01:20 absu vmd[55361]: vmd: vm 5 event thread exited unexpectedly Mar 17 17:01:21 absu /bsd: vmm_free_vpid: freed VPID/ASID 2 any hints on further debugging this? thanks, .jh Mar 17 11:07:59 absu /bsd: vm_impl_init_vmx: created vm_map @ 0xfd9db272edd8 Mar 17 11:08:00 absu /bsd: vm_resetcpu: resetting vm 3 vcpu 0 to power on defaults Mar 17 11:08:00 absu /bsd: Guest EPTP = 0x207da5c01e Mar 17 11:08:00 absu /bsd: vmm_alloc_vpid: allocated VPID/ASID 2 Mar 17 11:08:04 absu /bsd: vmx_handle_cr: mov to cr0 @ b4e0008b, data=0x80050033 Mar 17 11:08:04 absu /bsd: vmm_handle_cpuid: function 0x06 (thermal/power mgt) not supported Mar 17 11:08:04 absu /bsd: vmm_handle_cpuid: function 0x0f (QoS info) not supported Mar 17 11:08:04 absu /bsd: vmm_handle_cpuid: function 0x06 (thermal/power mgt) not supported Mar 17 11:08:04 absu /bsd: vmm_handle_cpuid: function 0x06 (thermal/power mgt) not supported Mar 17 11:08:04 absu /bsd: vmm_handle_cpuid: invalid cpuid input leaf 0x10, guest rip=0xb4e6cbbf - resetting to 0xf Mar 17 11:08:04 absu /bsd: vmm_handle_cpuid: function 0x0f (QoS info) not supported Mar 17 11:08:05 absu /bsd: vmm_handle_cpuid: invalid cpuid input leaf 0x10, guest rip=0xb4e6cbbf - resetting to 0xf Mar 17 11:08:05 absu /bsd: vmm_handle_cpuid: function 0x0f (QoS info) not supported Mar 17 11:08:05 absu /bsd: vmm_handle_cpuid: invalid cpuid input leaf 0x10, guest rip=0xb4e6cbbf - resetting to 0xf Mar 17 11:08:05 absu /bsd: vmm_handle_cpuid: function 0x0f (QoS info) not supported Mar 17 11:08:05 absu /bsd: vmm_handle_cpuid: invalid cpuid input leaf 0x10, guest rip=0xb4e6cbbf - resetting to 0xf Mar 17 11:08:05 absu /bsd: vmm_handle_cpuid: function 0x0f (QoS info) not supported Mar 17 11:08:05 absu /bsd: vmm_handle_cpuid: invalid cpuid input leaf 0x10, guest
Re: ldapd: fix return values for illegal passwords
ping... On Sun, 8 Mar 2020 12:18:39 +0100 Robert Klein wrote: > Hi, > > I thought a bit more about using LDAP resultCode values and I think > some intermediate values are needed so it is clearer what happens. > > Also, I found out a clients connection hangs in the "Database is being > reopened" case in ldap_auth_simple(). > > Below is a patch proposal that > > 1. patches the original issue, so ldapd returns "invalid credentials" >instead of "operations error" where appropriate. > 2. use private "result Code" values for intermediate results (see also > > https://www.iana.org/assignments/ldap-parameters/ldap-parameters.xhtml#ldap-parameters-6): >- LDAP_X_HANDLES_BY_LDAPE_IMSGEV for SASL binds and {BSDAUTH} style > passwords where the actual answer is provided via IMSGEV >- LDAP_X_PASSWORD_OK in check_password() because there's still some > work left until it can be called LDAP_SUCCESS. > 3. returns LDAP_BUSY for "Database is being reopened": the old code > did not lead to a "return ldap_respond" in bind_ldap(), so nothing is >returned to the client which is trying to bind and the client > hangs. > > > Best regards > Robert > > > diff 403185e43a653dece6518a28d0750f212ff40fc5 /usr/src > blob - f6f542e4e4a0af0b815643d9f622913a46707709 > file + usr.sbin/ldapd/aldap.h > --- usr.sbin/ldapd/aldap.h > +++ usr.sbin/ldapd/aldap.h > @@ -109,6 +109,8 @@ enum result_code { > LDAP_AFFECTS_MULTIPLE_DSAS = 71, > > LDAP_OTHER = 80, > + LDAP_X_PASSWORD_OK = 16384, > + LDAP_X_HANDLED_BY_LDAPE_IMSGEV = 16385, > }; > > enum filter { > blob - f8debff7a2d6a0feb9cf984905de385b029b8744 > file + usr.sbin/ldapd/auth.c > --- usr.sbin/ldapd/auth.c > +++ usr.sbin/ldapd/auth.c > @@ -179,33 +179,35 @@ send_auth_request(struct request *req, const > char *use const char *password) > { > struct auth_req auth_req; > + int ret = LDAP_X_HANDLED_BY_LDAPE_IMSGEV; > > memset(_req, 0, sizeof(auth_req)); > if (strlcpy(auth_req.name, username, > - sizeof(auth_req.name)) >= sizeof(auth_req.name)) > + sizeof(auth_req.name)) >= sizeof(auth_req.name)) { > + ret = LDAP_INVALID_CREDENTIALS; > goto fail; > + } > if (strlcpy(auth_req.password, password, > - sizeof(auth_req.password)) >= sizeof(auth_req.password)) > + sizeof(auth_req.password)) >= sizeof(auth_req.password)) > { > + ret = LDAP_INVALID_CREDENTIALS; > goto fail; > + } > auth_req.fd = req->conn->fd; > auth_req.msgid = req->msgid; > > if (imsgev_compose(iev_ldapd, IMSG_LDAPD_AUTH, 0, 0, -1, > _req, > - sizeof(auth_req)) == -1) > + sizeof(auth_req)) == -1) { > + ret = LDAP_OTHER; > goto fail; > + } > > req->conn->bind_req = req; > - return 0; > > fail: > memset(_req, 0, sizeof(auth_req)); > - return -1; > + return ret; > } > > -/* > - * Check password. Returns 1 if password matches, 2 if password > matching > - * is in progress, 0 on mismatch and -1 on error. > - */ > static int > check_password(struct request *req, const char *stored_passwd, > const char *passwd) > @@ -218,37 +220,39 @@ check_password(struct request *req, const char > *stored intsz; > > if (stored_passwd == NULL) > - return -1; > + return LDAP_INVALID_CREDENTIALS; > > if (strncmp(stored_passwd, "{SHA}", 5) == 0) { > sz = b64_pton(stored_passwd + 5, tmp, sizeof(tmp)); > if (sz != SHA_DIGEST_LENGTH) > - return (-1); > + return (LDAP_INVALID_CREDENTIALS); > SHA1_Init(); > SHA1_Update(, passwd, strlen(passwd)); > SHA1_Final(md, ); > - return (bcmp(md, tmp, SHA_DIGEST_LENGTH) == 0 ? 1 : > 0); > + return (bcmp(md, tmp, SHA_DIGEST_LENGTH) == 0 ? > + LDAP_X_PASSWORD_OK : LDAP_INVALID_CREDENTIALS); > } else if (strncmp(stored_passwd, "{SSHA}", 6) == 0) { > sz = b64_pton(stored_passwd + 6, tmp, sizeof(tmp)); > if (sz <= SHA_DIGEST_LENGTH) > - return (-1); > + return (LDAP_INVALID_CREDENTIALS); > salt = tmp + SHA_DIGEST_LENGTH; > SHA1_Init(); > SHA1_Update(, passwd, strlen(passwd)); > SHA1_Update(, salt, sz - SHA_DIGEST_LENGTH); > SHA1_Final(md, ); > - return (bcmp(md, tmp, SHA_DIGEST_LENGTH) == 0 ? 1 : > 0); > + return (bcmp(md, tmp, SHA_DIGEST_LENGTH) == 0 ? > + LDAP_X_PASSWORD_OK : LDAP_INVALID_CREDENTIALS); > } else if (strncmp(stored_passwd, "{CRYPT}", 7) == 0) { > encpw = crypt(passwd, stored_passwd + 7); > if (encpw == NULL) > -
ikectl(8): Reset SAs by policy ID
Hi, this diff adds a new command to ikectl(8) and iked(8) that allows to reset SAs based on the peers ID, which is equivalent to resetting a single policy. The expected ID format is the same as printed by 'ipsecctl -sf' in the 'dstid' field. Example: $ ikectl reset id FQDN/peer1 ok? diff --git a/sbin/iked/control.c b/sbin/iked/control.c index 67466c6a555..42b14e261e1 100644 --- a/sbin/iked/control.c +++ b/sbin/iked/control.c @@ -50,7 +50,8 @@ void control_imsg_forward(struct imsg *); voidcontrol_run(struct privsep *, struct privsep_proc *, void *); static struct privsep_proc procs[] = { - { "parent", PROC_PARENT, NULL } + { "parent", PROC_PARENT, NULL }, + { "ikev2", PROC_IKEV2, NULL } }; pid_t @@ -305,6 +306,9 @@ control_dispatch_imsg(int fd, short event, void *arg) case IMSG_CTL_PASSIVE: proc_forward_imsg(>sc_ps, , PROC_PARENT, -1); break; + case IMSG_CTL_RESET_ID: + proc_forward_imsg(>sc_ps, , PROC_IKEV2, -1); + break; default: log_debug("%s: error handling imsg %d", __func__, imsg.hdr.type); diff --git a/sbin/iked/ikev2.c b/sbin/iked/ikev2.c index 2dd2b2c648d..ebe0adce0e0 100644 --- a/sbin/iked/ikev2.c +++ b/sbin/iked/ikev2.c @@ -49,6 +49,7 @@ voidikev2_run(struct privsep *, struct privsep_proc *, void *); int ikev2_dispatch_parent(int, struct privsep_proc *, struct imsg *); int ikev2_dispatch_cert(int, struct privsep_proc *, struct imsg *); +int ikev2_dispatch_control(int, struct privsep_proc *, struct imsg *); struct iked_sa * ikev2_getimsgdata(struct iked *, struct imsg *, struct iked_sahdr *, @@ -155,10 +156,12 @@ intikev2_update_sa_addresses(struct iked *, struct iked_sa *); int ikev2_resp_informational(struct iked *, struct iked_sa *, struct iked_message *); +void ikev2_ctl_reset_id(struct iked *, struct imsg *, unsigned int); static struct privsep_proc procs[] = { { "parent", PROC_PARENT,ikev2_dispatch_parent }, - { "certstore", PROC_CERT, ikev2_dispatch_cert } + { "certstore", PROC_CERT, ikev2_dispatch_cert }, + { "control",PROC_CONTROL, ikev2_dispatch_control } }; pid_t @@ -376,6 +379,22 @@ ikev2_dispatch_cert(int fd, struct privsep_proc *p, struct imsg *imsg) return (0); } +int +ikev2_dispatch_control(int fd, struct privsep_proc *p, struct imsg *imsg) +{ + struct iked *env = p->p_env; + + switch (imsg->hdr.type) { + case IMSG_CTL_RESET_ID: + ikev2_ctl_reset_id(env, imsg, imsg->hdr.type); + break; + default: + return (-1); + } + + return (0); +} + const char * ikev2_ikesa_info(uint64_t spi, const char *msg) { @@ -390,6 +409,38 @@ ikev2_ikesa_info(uint64_t spi, const char *msg) return buf; } +void +ikev2_ctl_reset_id(struct iked *env, struct imsg *imsg, unsigned int type) +{ + struct iked_sa *sa; + char*reset_id = NULL; + char sa_id[IKED_ID_SIZE]; + + if ((reset_id = get_string(imsg->data, IMSG_DATA_SIZE(imsg))) == NULL) + return; + + log_debug("%s: %s %d", __func__, reset_id, type); + RB_FOREACH(sa, iked_sas, >sc_sas) { + if (ikev2_print_id(IKESA_DSTID(sa), sa_id, sizeof(sa_id)) == -1) + continue; + if (strcmp(reset_id, sa_id) != 0) + continue; + if (sa->sa_state == IKEV2_STATE_CLOSED) + continue; + if (sa->sa_state == IKEV2_STATE_ESTABLISHED) + ikev2_disable_timer(env, sa); + log_info("%s: IKE SA %p id %s ispi %s rspi %s", __func__, + sa, sa_id, + print_spi(sa->sa_hdr.sh_ispi, 8), + print_spi(sa->sa_hdr.sh_rspi, 8)); + ikev2_ikesa_delete(env, sa, 1); + /* default IKED_IKE_SA_DELETE_TIMEOUT is 120s, so switch to 6s */ + timer_add(env, >sa_timer, 3 * IKED_RETRANSMIT_TIMEOUT); + } + free(reset_id); +} + + struct iked_sa * ikev2_getimsgdata(struct iked *env, struct imsg *imsg, struct iked_sahdr *sh, uint8_t *type, uint8_t **buf, size_t *size) diff --git a/sbin/iked/types.h b/sbin/iked/types.h index e8881a74e9a..07cec6a5902 100644 --- a/sbin/iked/types.h +++ b/sbin/iked/types.h @@ -105,6 +105,7 @@ enum imsg_type { IMSG_CTL_MOBIKE, IMSG_CTL_FRAGMENTATION, IMSG_CTL_NATTPORT, + IMSG_CTL_RESET_ID, IMSG_COMPILE, IMSG_UDP_SOCKET, IMSG_PFKEY_SOCKET, diff --git a/usr.sbin/ikectl/ikectl.8 b/usr.sbin/ikectl/ikectl.8 index 40d30ac0e21..9956b42b3c0 100644 --- a/usr.sbin/ikectl/ikectl.8 +++
uvm_map_inentry diff for testing
While playing with dt(4)/btrace(4) flamegraphs on a 32-core arm64 machine, I noticed that the kernel was spending a lot of time (6.84%) in uvm_map_inentry(). This is caused by kernel lock contention. Pushing baack the kernel lock further into the slow path reduces the time to 0.05%. Now mpi@ tried this before in sys/uvm/uvm_map.c rev 1.249. That was backed out because it caused issues with golang. So I had a look again and identified a potential issue. When we "fix" the cached entry, we use the serial number that has been passed down to us. But since that serial number was recorded before we grapped the vm_map lock, it may be stale. Instead we should be using the current map serial number. This makes me wonder whether having two serial numbers is a case of premature optimization. But that is a different discussion. I'd like to see this tested, especially by people running/building golang. Does anybody remember specific details of the issue? If it manifests itself, it should be pretty obvious as it results in a kernel printf and a killed process. Index: uvm/uvm_map.c === RCS file: /cvs/src/sys/uvm/uvm_map.c,v retrieving revision 1.263 diff -u -p -r1.263 uvm_map.c --- uvm/uvm_map.c 4 Mar 2020 21:15:38 - 1.263 +++ uvm/uvm_map.c 17 Mar 2020 18:57:19 - @@ -161,7 +161,7 @@ void uvm_map_addr_augment(struct vm_m int uvm_map_inentry_recheck(u_long, vaddr_t, struct p_inentry *); boolean_t uvm_map_inentry_fix(struct proc *, struct p_inentry *, -vaddr_t, int (*)(vm_map_entry_t), u_long); +vaddr_t, int (*)(vm_map_entry_t)); /* * Tree management functions. */ @@ -1848,10 +1848,11 @@ uvm_map_inentry_recheck(u_long serial, v */ boolean_t uvm_map_inentry_fix(struct proc *p, struct p_inentry *ie, vaddr_t addr, -int (*fn)(vm_map_entry_t), u_long serial) +int (*fn)(vm_map_entry_t)) { vm_map_t map = >p_vmspace->vm_map; vm_map_entry_t entry; + u_long serial; int ret; if (addr < map->min_offset || addr >= map->max_offset) @@ -1866,6 +1867,11 @@ uvm_map_inentry_fix(struct proc *p, stru return (FALSE); } + if (ie == >p_spinentry) + serial = map->sserial; + else + serial = map->wserial; + ret = (*fn)(entry); if (ret == 0) { vm_map_unlock_read(map); @@ -1889,16 +1895,16 @@ uvm_map_inentry(struct proc *p, struct p boolean_t ok = TRUE; if (uvm_map_inentry_recheck(serial, addr, ie)) { - KERNEL_LOCK(); - ok = uvm_map_inentry_fix(p, ie, addr, fn, serial); + ok = uvm_map_inentry_fix(p, ie, addr, fn); if (!ok) { printf(fmt, p->p_p->ps_comm, p->p_p->ps_pid, p->p_tid, addr, ie->ie_start, ie->ie_end); + KERNEL_LOCK(); p->p_p->ps_acflag |= AMAP; sv.sival_ptr = (void *)PROC_PC(p); trapsignal(p, SIGSEGV, 0, SEGV_ACCERR, sv); + KERNEL_UNLOCK(); } - KERNEL_UNLOCK(); } return (ok); }
Re: bug? in getopt(3) / possible [PATCH]
On Tue, Mar 17, 2020 at 02:14:58PM +0100, Martijn van Duren wrote: > Included millert, since he imported this code. > He might shed some more light on what's intended. > > On 3/15/20 11:27 PM, 0xef967...@gmail.com wrote: > > On Sun, Mar 15, 2020 at 07:32:52PM +0100, Martijn van Duren wrote: > >> tl;dr new diff below > > > > Check with: > > > > OPTS=r- ./getopt-test-MvD2 -r- > > getopt-test-MvD2: unknown option -- - > > #U- | > > > > With the unpatched code, my patch, and the original 44BSD, it's: > > > > OPTS=r- ./getopt-test-O -r- > > {./getopt-test-O} <-> | > > > > See proposed fix at the end. > > This is intentional. From getopt_long(3): > > OpenBSD a ‘-’ within the option string matches a ‘-’ (single > dash) on the command line. This functionality is > provided for backward compatibility with programs, > such as su(1), that use ‘-’ as an option flag. This > practice is wrong, and should not be used in any > current development. I don't get your point, sorry. My patch didn't mean to change that. Nor do I think that it should be changed (if that matters). My stupid and unnecessary "(optchar == (int)'-' && oli == options)" check was breaking the case where someone meant to use both the GNU-like optstring-starting-with-dash and the BSD dashes-in-optstring features, by using an optstring like "--xy" or "-x-y". As I mentioned, that was a mistake. For the reference, here is an updated patch, without that check: --- getopt-long.c~ 2020-03-12 02:23:29.028903616 +0200 +++ getopt-long.c 2020-03-15 23:46:07.988119523 +0200 @@ -418,15 +418,7 @@ } if ((optchar = (int)*place++) == (int)':' || - (optchar == (int)'-' && *place != '\0') || (oli = strchr(options, optchar)) == NULL) { - /* -* If the user specified "-" and '-' isn't listed in -* options, return -1 (non-option) as per POSIX. -* Otherwise, it is an unknown option character (or ':'). -*/ - if (optchar == (int)'-' && *place == '\0') - return (-1); if (!*place) ++optind; if (PRINT_ERROR)
Re: macppc kernel and clang
On Mon, 16 Mar 2020 19:13:13 -0600 "Theo de Raadt" wrote: > How are the bootblocks faring? > > And userland? ofwboot with clang works for me. I failed to make bsd.rd (in src/distrib/macppc/ramdisk) with clang, but I don't remember exactly what I did, so I might have been making it the wrong way. My build failed at clang -static -L. -nopie -o instbin instbin.o dd.lo ... /usr/bin/ld: dd.lo(.text+0x14): R_PPC_PLTREL24 reloc against local symbol I was able to make bsd.rd with gcc. "$CC -fno-pie -c something.c; objdump -dlr something.o" shows that gcc -fno-pie uses R_PPC_REL24, but clang -fno-pie uses R_PPC_PLTREL24, for calls to external functions. The symbols would be global until crunchgen(8) hides them by marking them local. R_PPC_REL24 would branch directly to a function, and R_PPC_PLTREL24 would go through the PLT (for a function in a shared library). The compiler can't know whether the function is in a shared lib; the linker must decide. clang -fno-pie on amd64 uses R_X86_64_PLT32, so I guess that clang -fno-pie on powerpc isn't wrong to use R_PPC_PLTREL24. Passing -M to crunchgen(8), as we do on {longsoon,octeon,sgi}, might work around the problem, but I haven't tried it, and I don't know whether this is the only problem. --George
Re: ktrwriteraw & vget
On 17/03/20(Tue) 07:50, Philip Guenther wrote: > On Tue, Mar 17, 2020 at 5:18 AM Martin Pieuchot wrote: > > On 17/03/20(Tue) 04:02, Philip Guenther wrote: > > > On Tue, Mar 17, 2020 at 1:07 AM Martin Pieuchot wrote: > > > [...] > > > > @@ -663,8 +668,6 @@ ktrwriteraw(struct proc *curp, struct vn > > > > LIST_FOREACH(pr, , ps_list) > > > > if (pr->ps_tracevp == vp && pr->ps_tracecred == cred) > > > > ktrcleartrace(pr); > > > > - > > > > - vput(vp); > > > > return (error); > > > > } > > > > > > > > > > This looks unsafe to me: isn't ktrcleartrace() only safe if the caller > > > holds a reference to the vnode? Once ktrcleartrace() clears the > > reference > > > from the current thread's process and it goes on the freelist, can't the > > > vnode vp points to be invalidated and reused? > > > > As long as a process holds a reference to the vnode, via `ps_tracevp', > > it wont be recycle. Only the last call of ktrcleartrace() will release > > the vnode via vrele(9). > > ...and after that last reference is released this code will continue to > walk the allprocess list, comparing a possibly-recycled pointer to > ps_tracevp pointers in the remaining processes. Good thing that vrele(9) > is guaranteed to never sleep and thereby let another process start > ktracing, reusing that vnode.?? Are you saying that `allprocess' should only be walked if the current thread is still holding a valid reference to the vnode? Index: kern/kern_ktrace.c === RCS file: /cvs/src/sys/kern/kern_ktrace.c,v retrieving revision 1.100 diff -u -p -r1.100 kern_ktrace.c --- kern/kern_ktrace.c 6 Oct 2019 16:24:14 - 1.100 +++ kern/kern_ktrace.c 17 Mar 2020 17:14:46 - @@ -649,22 +649,29 @@ ktrwriteraw(struct proc *curp, struct vn auio.uio_iovcnt++; auio.uio_resid += kth->ktr_len; } - vget(vp, LK_EXCLUSIVE | LK_RETRY); + error = vget(vp, LK_EXCLUSIVE | LK_RETRY); + if (error) + goto bad; error = VOP_WRITE(vp, , IO_UNIT|IO_APPEND, cred); - if (!error) { - vput(vp); - return (0); - } + vput(vp); + if (error) + goto bad; + + return (0); + +bad: /* * If error encountered, give up tracing on this vnode. */ log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n", error); - LIST_FOREACH(pr, , ps_list) + LIST_FOREACH(pr, , ps_list) { + if (pr == curp->p_p) + continue; if (pr->ps_tracevp == vp && pr->ps_tracecred == cred) ktrcleartrace(pr); - - vput(vp); + } + ktrcleartrace(curp->p_p); return (error); }
Expose a few more symbols in arm64 kernel
While playing with dt(4) on arm64, I noticed that there were some unrecognized functions. Diff below fixes that. ok? Index: arch/arm64/arm64/exception.S === RCS file: /cvs/src/sys/arch/arm64/arm64/exception.S,v retrieving revision 1.10 diff -u -p -r1.10 exception.S --- arch/arm64/arm64/exception.S17 Dec 2019 22:25:56 - 1.10 +++ arch/arm64/arm64/exception.S17 Mar 2020 17:06:06 - @@ -155,6 +155,8 @@ 1: .endm + .globl handle_el1h_sync + .type handle_el1h_sync,@function handle_el1h_sync: save_registers 1 mov x0, sp @@ -164,6 +166,8 @@ handle_el1h_sync: dsb nsh isb + .globl handle_el1h_irq + .type handle_el1h_irq,@function handle_el1h_irq: save_registers 1 mov x0, sp @@ -173,6 +177,8 @@ handle_el1h_irq: dsb nsh isb + .globl handle_el1h_error + .type handle_el1h_error,@function handle_el1h_error: brk 0xf13
Re: ldomctl: list-io: print names
> Date: Tue, 17 Mar 2020 17:27:21 +0100 > From: Klemens Nanni > > On Tue, Mar 17, 2020 at 03:26:56PM +0100, Mark Kettenis wrote: > > > @@ -2889,7 +2896,8 @@ list_components(void) > > > > > > pri_init(pri); > > > > > > + printf("PATH\t\tNAME\n"); > > > > Using tabs to make things line up isn't going to work very well. So > > better use field widths in the printf calls. > Sure thing, assignable paths should not be longer than 15 characters, so > I went with even 16 as column width. > > OK? ok kettenis@ > Index: config.c > === > RCS file: /cvs/src/usr.sbin/ldomctl/config.c,v > retrieving revision 1.35 > diff -u -p -r1.35 config.c > --- config.c 7 Mar 2020 18:51:06 - 1.35 > +++ config.c 17 Mar 2020 16:00:29 - > @@ -48,6 +48,7 @@ TAILQ_HEAD(, core) cores; > > struct component { > const char *path; > + const char *nac; > int assigned; > > struct md_node *hv_node; > @@ -216,6 +217,7 @@ pri_init_components(struct md *md) > struct component *component; > struct md_node *node; > const char *path; > + const char *nac; > const char *type; > > TAILQ_INIT(); > @@ -228,6 +230,10 @@ pri_init_components(struct md *md) > if (md_get_prop_str(md, node, "assignable-path", )) { > component = xzalloc(sizeof(*component)); > component->path = path; > + if (md_get_prop_str(md, node, "nac", )) > + component->nac = nac; > + else > + component->nac = "-"; > TAILQ_INSERT_TAIL(, component, link); > } > > @@ -2889,7 +2896,8 @@ list_components(void) > > pri_init_components(pri); > > + printf("%-16s %s\n", "PATH", "NAME"); > TAILQ_FOREACH(component, , link) { > - printf("%s\n", component->path); > + printf("%-16s %s\n", component->path, component->nac); > } > } >
Re: ktrwriteraw & vget
On Tue, Mar 17, 2020 at 5:18 AM Martin Pieuchot wrote: > On 17/03/20(Tue) 04:02, Philip Guenther wrote: > > On Tue, Mar 17, 2020 at 1:07 AM Martin Pieuchot wrote: > > [...] > > > @@ -663,8 +668,6 @@ ktrwriteraw(struct proc *curp, struct vn > > > LIST_FOREACH(pr, , ps_list) > > > if (pr->ps_tracevp == vp && pr->ps_tracecred == cred) > > > ktrcleartrace(pr); > > > - > > > - vput(vp); > > > return (error); > > > } > > > > > > > This looks unsafe to me: isn't ktrcleartrace() only safe if the caller > > holds a reference to the vnode? Once ktrcleartrace() clears the > reference > > from the current thread's process and it goes on the freelist, can't the > > vnode vp points to be invalidated and reused? > > As long as a process holds a reference to the vnode, via `ps_tracevp', > it wont be recycle. Only the last call of ktrcleartrace() will release > the vnode via vrele(9). > ...and after that last reference is released this code will continue to walk the allprocess list, comparing a possibly-recycled pointer to ps_tracevp pointers in the remaining processes. Good thing that vrele(9) is guaranteed to never sleep and thereby let another process start ktracing, reusing that vnode.??
Re: ldomctl: list-io: print names
On Tue, Mar 17, 2020 at 03:26:56PM +0100, Mark Kettenis wrote: > > @@ -2889,7 +2896,8 @@ list_components(void) > > > > pri_init(pri); > > > > + printf("PATH\t\tNAME\n"); > > Using tabs to make things line up isn't going to work very well. So > better use field widths in the printf calls. Sure thing, assignable paths should not be longer than 15 characters, so I went with even 16 as column width. OK? Index: config.c === RCS file: /cvs/src/usr.sbin/ldomctl/config.c,v retrieving revision 1.35 diff -u -p -r1.35 config.c --- config.c7 Mar 2020 18:51:06 - 1.35 +++ config.c17 Mar 2020 16:00:29 - @@ -48,6 +48,7 @@ TAILQ_HEAD(, core) cores; struct component { const char *path; + const char *nac; int assigned; struct md_node *hv_node; @@ -216,6 +217,7 @@ pri_init_components(struct md *md) struct component *component; struct md_node *node; const char *path; + const char *nac; const char *type; TAILQ_INIT(); @@ -228,6 +230,10 @@ pri_init_components(struct md *md) if (md_get_prop_str(md, node, "assignable-path", )) { component = xzalloc(sizeof(*component)); component->path = path; + if (md_get_prop_str(md, node, "nac", )) + component->nac = nac; + else + component->nac = "-"; TAILQ_INSERT_TAIL(, component, link); } @@ -2889,7 +2896,8 @@ list_components(void) pri_init_components(pri); + printf("%-16s %s\n", "PATH", "NAME"); TAILQ_FOREACH(component, , link) { - printf("%s\n", component->path); + printf("%-16s %s\n", component->path, component->nac); } }
Re: ldomctl: list-io: print names
> Date: Tue, 17 Mar 2020 14:12:14 +0100 > From: Klemens Nanni > Content-Type: text/plain; charset=us-ascii > Content-Disposition: inline > > On Sat, Mar 07, 2020 at 05:44:12PM +0100, Klemens Nanni wrote: > > Next to the IO device path to be used in ldom.conf we can also print the > > name which actually tells in human readable form what device we're > > looking at. > > > > Those names are printed in similar format by Solaris `ldm list-io' and > > directly match the the structure seen in the iLOM shell. > > > > $ doas ldomctl list-io > > PATHNAME > > /@400/@2/@0/@8 /SYS/MB/PCIE0 > > /@500/@2/@0/@a /SYS/MB/PCIE1 > > /@400/@2/@0/@4 /SYS/MB/PCIE2 > > /@500/@2/@0/@6 /SYS/MB/PCIE3 > > /@400/@2/@0/@0 /SYS/MB/PCIE4 > > /@500/@2/@0/@0 /SYS/MB/PCIE5 > > /@400/@1/@0/@8 /SYS/MB/PCIE6 > > /@500/@1/@0/@6 /SYS/MB/PCIE7 > > /@400/@1/@0/@c /SYS/MB/PCIE8 > > /@500/@1/@0/@0 /SYS/MB/PCIE9 > > /@400/@2/@0/@e /SYS/MB/SASHBA > > /@400/@1/@0/@4 /SYS/MB/NET0 > > /@500/@1/@0/@5 /SYS/MB/NET2 > > > > OK? > Anyone? I'll probably just commit in a few days unless someone objects, > printing device names is a nice aid in partitioning machines, especially > since picking the wrong iodevice needs power cycling and might even > prevent the primary domain from booting since another guest domain > accidentially got its root disk device. > > PS: with the new mdprint port on ports@ one can easily inspect the PRI > and verify: > > $ mdprint -pqng component.assignable-path,component.nac ./pri | > > grep assignable > component|assignable-path="/@400/@2/@0/@8"|nac="/SYS/MB/PCIE0" > component|assignable-path="/@500/@2/@0/@a"|nac="/SYS/MB/PCIE1" > component|assignable-path="/@400/@2/@0/@4"|nac="/SYS/MB/PCIE2" > component|assignable-path="/@500/@2/@0/@6"|nac="/SYS/MB/PCIE3" > component|assignable-path="/@400/@2/@0/@0"|nac="/SYS/MB/PCIE4" > component|assignable-path="/@500/@2/@0/@0"|nac="/SYS/MB/PCIE5" > component|assignable-path="/@400/@1/@0/@8"|nac="/SYS/MB/PCIE6" > component|assignable-path="/@500/@1/@0/@6"|nac="/SYS/MB/PCIE7" > component|assignable-path="/@400/@1/@0/@c"|nac="/SYS/MB/PCIE8" > component|assignable-path="/@500/@1/@0/@0"|nac="/SYS/MB/PCIE9" > component|assignable-path="/@400/@2/@0/@e"|nac="/SYS/MB/SASHBA" > component|assignable-path="/@400/@1/@0/@4"|nac="/SYS/MB/NET0" > component|assignable-path="/@500/@1/@0/@5"|nac="/SYS/MB/NET2" > > That's how ldomctl works around iodevices: They're "component" nodes in > the tree that have an "assignable-path" attribute; ldomctl picks those, > assigns whatever is specified in ldom.conf to guest domains, and assigns > all remaining components to the primary domain. Looks like I missed this. I like this. However: > Index: config.c > === > RCS file: /cvs/src/usr.sbin/ldomctl/config.c,v > retrieving revision 1.34 > diff -u -p -r1.34 config.c > --- config.c 21 Feb 2020 19:39:28 - 1.34 > +++ config.c 7 Mar 2020 16:39:25 - > @@ -48,6 +48,7 @@ TAILQ_HEAD(, core) cores; > > struct component { > const char *path; > + const char *nac; > int assigned; > > struct md_node *hv_node; > @@ -216,6 +217,7 @@ pri_init_components(struct md *md) > struct component *component; > struct md_node *node; > const char *path; > + const char *nac; > const char *type; > > TAILQ_INIT(); > @@ -228,6 +230,10 @@ pri_init_components(struct md *md) > if (md_get_prop_str(md, node, "assignable-path", )) { > component = xzalloc(sizeof(*component)); > component->path = path; > + if (md_get_prop_str(md, node, "nac", )) > + component->nac = nac; > + else > + component->nac = "-"; > TAILQ_INSERT_TAIL(, component, link); > } > > @@ -2889,7 +2896,8 @@ list_components(void) > > pri_init(pri); > > + printf("PATH\t\tNAME\n"); Using tabs to make things line up isn't going to work very well. So better use field widths in the printf calls. > TAILQ_FOREACH(component, , link) { > - printf("%s\n", component->path); > + printf("%s\t%s\n", component->path, component->nac); > } > } >
Re: ktrwriteraw & vget
On 17/03/20(Tue) 04:02, Philip Guenther wrote: > On Tue, Mar 17, 2020 at 1:07 AM Martin Pieuchot wrote: > [...] > > @@ -663,8 +668,6 @@ ktrwriteraw(struct proc *curp, struct vn > > LIST_FOREACH(pr, , ps_list) > > if (pr->ps_tracevp == vp && pr->ps_tracecred == cred) > > ktrcleartrace(pr); > > - > > - vput(vp); > > return (error); > > } > > > > This looks unsafe to me: isn't ktrcleartrace() only safe if the caller > holds a reference to the vnode? Once ktrcleartrace() clears the reference > from the current thread's process and it goes on the freelist, can't the > vnode vp points to be invalidated and reused? As long as a process holds a reference to the vnode, via `ps_tracevp', it wont be recycle. Only the last call of ktrcleartrace() will release the vnode via vrele(9).
Re: bug? in getopt(3) / possible [PATCH]
Included millert, since he imported this code. He might shed some more light on what's intended. On 3/15/20 11:27 PM, 0xef967...@gmail.com wrote: > On Sun, Mar 15, 2020 at 07:32:52PM +0100, Martijn van Duren wrote: >> tl;dr new diff below > > Check with: > > OPTS=r- ./getopt-test-MvD2 -r- > getopt-test-MvD2: unknown option -- - > #U- | > > With the unpatched code, my patch, and the original 44BSD, it's: > > OPTS=r- ./getopt-test-O -r- > {./getopt-test-O} <-> | > > See proposed fix at the end. This is intentional. From getopt_long(3): OpenBSD a ‘-’ within the option string matches a ‘-’ (single dash) on the command line. This functionality is provided for backward compatibility with programs, such as su(1), that use ‘-’ as an option flag. This practice is wrong, and should not be used in any current development. The original might have accepted this, but if the getopt_long(3) manpage is rather specific about this and the current code goes out of its way to make sure that it only appears at the end of the string (-q-q doesn't work). So why go out of your way to disallow that behaviour, but not catch the case you describe here (and is not documented as such). If we do care about this particular feature then why add the additional checks at all? Why not let all the code prior to this section handle all the common "-" cases and let the rest utilize "-" as any normal getopt character, including an optstring with "-:" which would allow -q- and "- ". My reasoning is: Let's restrict this misfeature as much as possible and only use it for where it's needed (su(1)). But let's see what millert or any of the other more experienced devs have to say. > >>optreset and flags. Note that on line 317 we increment the pointer on >>options. This means that the "oli == options" check in your code is >>not needed and even introduces a bug when someone would be stupid >>enough to create an optstring which starts with "--", where a "-" >>option stop to being recognized: > > Yes, that check was dumb, it should be removed. > >> On 3/15/20 11:21 AM, 0xef967...@gmail.com wrote: >>> Notice that OpenBSD's getopt(3) explicitly allows to use "-" as an >>> option character, and there may be a /separate/ bug related to that, >>> but my patch doesn't change or affect it in any way. >> >> This is in line with getopt(3) and getopt_long(3) expands on that >> saying it's (at least) for su(1). So if we want to remove this >> (mis)feature a lot more scrutiny needs to be done and I'm not feeling >> like going down that rab(bit|ies)-hole any time soon. > > I didn't say that that feature should be removed, but that there may > be another bug/incompat related to that. Since it was there since 44BSD, > it should be left as is (IMVHO). > >> I did some pointer dumping inside your test-application and inside >> getopt_internal and found that the OPTS environment variable is 1 byte >> from the last place pointer. When you overwrite every byte of the >> original avs with ':' (including the terminating NUL) you expand the >> string length of the place variable inside getopt_internal and it starts >> to parse the "garbage" you see in your example. > > That was of course intentional. It's perfectly legit to overwrite > the args strings (including their terminating \0). And it makes for > more dramatic testcases ;-) > >> Since our manpage explicitly states that you need to set optreset I >> don't know if it's worth adding a "fix" for, if such a thing is at > > No need to add another fix for it; that return (-1) that my (and your > current patch) eliminates is the only path through which -1 can be > returned /without/ setting "place = EMSG". > >> if ((optchar = (int)*place++) == (int)':' || >> -(optchar == (int)'-' && *place != '\0') || >> -(oli = strchr(options, optchar)) == NULL) { >> -/* >> - * If the user specified "-" and '-' isn't listed in >> - * options, return -1 (non-option) as per POSIX. >> - * Otherwise, it is an unknown option character (or ':'). >> - */ >> -if (optchar == (int)'-' && *place == '\0') >> -return (-1); >> +(oli = strchr(options, optchar)) == NULL || >> +(optchar == '-' && >> +(oli == NULL || strcmp(nargv[optind], "-") != 0))) { >> if (!*place) >> ++optind; >> if (PRINT_ERROR) > > ... || (foo = bar) == NULL || (baz && (foo == NULL || quux)) > > is the same as > > ... || (foo = bar) == NULL || (baz && quux) > > since the "||" is shortcutting. > > But I still don't see how "quux" (strcmp(nargv[optind], "-") != 0) > may happen with a args which is just "-" (since it's already caught by > the "If we have "-" do nothing" check above it). > > It will however BREAK a case like that mentioned at the beginning > of
Re: ldomctl: list-io: print names
On Sat, Mar 07, 2020 at 05:44:12PM +0100, Klemens Nanni wrote: > Next to the IO device path to be used in ldom.conf we can also print the > name which actually tells in human readable form what device we're > looking at. > > Those names are printed in similar format by Solaris `ldm list-io' and > directly match the the structure seen in the iLOM shell. > > $ doas ldomctl list-io > PATHNAME > /@400/@2/@0/@8 /SYS/MB/PCIE0 > /@500/@2/@0/@a /SYS/MB/PCIE1 > /@400/@2/@0/@4 /SYS/MB/PCIE2 > /@500/@2/@0/@6 /SYS/MB/PCIE3 > /@400/@2/@0/@0 /SYS/MB/PCIE4 > /@500/@2/@0/@0 /SYS/MB/PCIE5 > /@400/@1/@0/@8 /SYS/MB/PCIE6 > /@500/@1/@0/@6 /SYS/MB/PCIE7 > /@400/@1/@0/@c /SYS/MB/PCIE8 > /@500/@1/@0/@0 /SYS/MB/PCIE9 > /@400/@2/@0/@e /SYS/MB/SASHBA > /@400/@1/@0/@4 /SYS/MB/NET0 > /@500/@1/@0/@5 /SYS/MB/NET2 > > OK? Anyone? I'll probably just commit in a few days unless someone objects, printing device names is a nice aid in partitioning machines, especially since picking the wrong iodevice needs power cycling and might even prevent the primary domain from booting since another guest domain accidentially got its root disk device. PS: with the new mdprint port on ports@ one can easily inspect the PRI and verify: $ mdprint -pqng component.assignable-path,component.nac ./pri | > grep assignable component|assignable-path="/@400/@2/@0/@8"|nac="/SYS/MB/PCIE0" component|assignable-path="/@500/@2/@0/@a"|nac="/SYS/MB/PCIE1" component|assignable-path="/@400/@2/@0/@4"|nac="/SYS/MB/PCIE2" component|assignable-path="/@500/@2/@0/@6"|nac="/SYS/MB/PCIE3" component|assignable-path="/@400/@2/@0/@0"|nac="/SYS/MB/PCIE4" component|assignable-path="/@500/@2/@0/@0"|nac="/SYS/MB/PCIE5" component|assignable-path="/@400/@1/@0/@8"|nac="/SYS/MB/PCIE6" component|assignable-path="/@500/@1/@0/@6"|nac="/SYS/MB/PCIE7" component|assignable-path="/@400/@1/@0/@c"|nac="/SYS/MB/PCIE8" component|assignable-path="/@500/@1/@0/@0"|nac="/SYS/MB/PCIE9" component|assignable-path="/@400/@2/@0/@e"|nac="/SYS/MB/SASHBA" component|assignable-path="/@400/@1/@0/@4"|nac="/SYS/MB/NET0" component|assignable-path="/@500/@1/@0/@5"|nac="/SYS/MB/NET2" That's how ldomctl works around iodevices: They're "component" nodes in the tree that have an "assignable-path" attribute; ldomctl picks those, assigns whatever is specified in ldom.conf to guest domains, and assigns all remaining components to the primary domain. Index: config.c === RCS file: /cvs/src/usr.sbin/ldomctl/config.c,v retrieving revision 1.34 diff -u -p -r1.34 config.c --- config.c21 Feb 2020 19:39:28 - 1.34 +++ config.c7 Mar 2020 16:39:25 - @@ -48,6 +48,7 @@ TAILQ_HEAD(, core) cores; struct component { const char *path; + const char *nac; int assigned; struct md_node *hv_node; @@ -216,6 +217,7 @@ pri_init_components(struct md *md) struct component *component; struct md_node *node; const char *path; + const char *nac; const char *type; TAILQ_INIT(); @@ -228,6 +230,10 @@ pri_init_components(struct md *md) if (md_get_prop_str(md, node, "assignable-path", )) { component = xzalloc(sizeof(*component)); component->path = path; + if (md_get_prop_str(md, node, "nac", )) + component->nac = nac; + else + component->nac = "-"; TAILQ_INSERT_TAIL(, component, link); } @@ -2889,7 +2896,8 @@ list_components(void) pri_init(pri); + printf("PATH\t\tNAME\n"); TAILQ_FOREACH(component, , link) { - printf("%s\n", component->path); + printf("%s\t%s\n", component->path, component->nac); } }
Re: ktrwriteraw & vget
On Tue, Mar 17, 2020 at 1:07 AM Martin Pieuchot wrote: > On 16/03/20(Mon) 14:01, Martin Pieuchot wrote: > > vget(9) might fail, stop right away if that happens. > > > > CID 1453020 Unchecked return value. > > Updated diff that stops tracing if vget(9) fails, similar to what's > currently done if VOP_WRITE(9) fails, suggested by visa@. > > Code shuffling is there to avoid calling vput(9) if vget(9) doesn't > succeed. > > ok? > > Index: kern/kern_ktrace.c > === > RCS file: /cvs/src/sys/kern/kern_ktrace.c,v > retrieving revision 1.100 > diff -u -p -r1.100 kern_ktrace.c > --- kern/kern_ktrace.c 6 Oct 2019 16:24:14 - 1.100 > +++ kern/kern_ktrace.c 17 Mar 2020 09:46:03 - > @@ -649,12 +649,17 @@ ktrwriteraw(struct proc *curp, struct vn > auio.uio_iovcnt++; > auio.uio_resid += kth->ktr_len; > } > - vget(vp, LK_EXCLUSIVE | LK_RETRY); > + error = vget(vp, LK_EXCLUSIVE | LK_RETRY); > + if (error) > + goto bad; > error = VOP_WRITE(vp, , IO_UNIT|IO_APPEND, cred); > - if (!error) { > - vput(vp); > - return (0); > - } > + vput(vp); > + if (error) > + goto bad; > + > + return (0); > + > +bad: > /* > * If error encountered, give up tracing on this vnode. > */ > @@ -663,8 +668,6 @@ ktrwriteraw(struct proc *curp, struct vn > LIST_FOREACH(pr, , ps_list) > if (pr->ps_tracevp == vp && pr->ps_tracecred == cred) > ktrcleartrace(pr); > - > - vput(vp); > return (error); > } > This looks unsafe to me: isn't ktrcleartrace() only safe if the caller holds a reference to the vnode? Once ktrcleartrace() clears the reference from the current thread's process and it goes on the freelist, can't the vnode vp points to be invalidated and reused? Maybe this vget() should be split into vref() + vn_lock(), and the vput() similarly split into VOP_UNLOCK() + vrele(), so the vrele() can be left after this LIST_FOREACH() while the VOP_UNLOCK() is hoisted to the "vn_lock() succeeded" path. Philip Guenther
Please test: ix(4)+em(4) MSI-X towards multiqueues
Diff below allows ix(4) and em(4) to establish two MSI-X handlers. Multiqueue support is intentionally not included in this diff. One handler is for the single queue (index ":0") and one for the link interrupt: # vmstat -i |grep ix0 irq114/ix0:0 73178178 3758 irq115/ix0 20 A per-driver toggle allows to switch MSI-X support on/off. My plan is to commit with the switch "off" for the moment. Switching it to "on" should be better done in the beginning of a release cycle. However it is "on" in the diff below for testing purposes. The em(4) diff doesn't include support for 82575 nor 82574. If somebody has such hardware and is interested I can lift the necessary bits from FreeBSD. That said em_setup_queues_msix() contains references to 82575 to help diffing with FreeBSD. The ix(4) diff cannot be committed right now as it breaks the build on architectures that define pci_intr_map_msix() to (-1). The compiler complains that the arguments of the macro are unused. /sys/dev/pci/if_ix.c:1789:21: error: unused variable 'dummy' [-Werror,-Wunused-variable] pci_intr_handle_tdummy; ^ /sys/dev/pci/if_ix.c:1788:26: error: unused variable 'pa' [-Werror,-Wunused-variable] struct pci_attach_args *pa = >os_pa; So I'd like to know if that's the way to test if MSI-X can be used: /* * Try a dummy map, maybe this bus doesn't like MSI, this function * has no side effects. */ if (pci_intr_map_msix(pa, 0, )) return (0); I'd appreciate tests on many hardware as possible, especially for em(4). If you have been testing an earlier version of these diffs, please do it again :) Thanks, Martin Index: dev/pci/if_em.c === RCS file: /cvs/src/sys/dev/pci/if_em.c,v retrieving revision 1.347 diff -u -p -r1.347 if_em.c --- dev/pci/if_em.c 8 Mar 2020 11:43:43 - 1.347 +++ dev/pci/if_em.c 17 Mar 2020 11:18:01 - @@ -233,6 +233,7 @@ void em_defer_attach(struct device*); int em_detach(struct device *, int); int em_activate(struct device *, int); int em_intr(void *); +int em_allocate_legacy(struct em_softc *); void em_start(struct ifqueue *); int em_ioctl(struct ifnet *, u_long, caddr_t); void em_watchdog(struct ifnet *); @@ -290,6 +291,13 @@ void em_flush_tx_ring(struct em_queue *) void em_flush_rx_ring(struct em_queue *); void em_flush_desc_rings(struct em_softc *); +/* MSIX/Multiqueue functions */ +int em_allocate_msix(struct em_softc *); +int em_setup_queues_msix(struct em_softc *); +int em_queue_intr_msix(void *); +int em_link_intr_msix(void *); +void em_enable_queue_intr_msix(struct em_queue *); + /* * OpenBSD Device Interface Entry Points */ @@ -304,6 +312,7 @@ struct cfdriver em_cd = { }; static int em_smart_pwr_down = FALSE; +int em_enable_msix = 1; /* * Device identification routine @@ -919,6 +928,14 @@ em_init(void *arg) } em_initialize_receive_unit(sc); + if (sc->msix) { + if (em_setup_queues_msix(sc)) { + printf("%s: Can't setup msix queues\n", DEVNAME(sc)); + splx(s); + return; + } + } + /* Program promiscuous mode and multicast filters. */ em_iff(sc); @@ -1617,10 +1634,7 @@ int em_allocate_pci_resources(struct em_softc *sc) { int val, rid; - pci_intr_handle_t ih; - const char *intrstr = NULL; struct pci_attach_args *pa = >osdep.em_pa; - pci_chipset_tag_t pc = pa->pa_pc; struct em_queue*que = NULL; val = pci_conf_read(pa->pa_pc, pa->pa_tag, EM_MMBA); @@ -1691,6 +1705,9 @@ em_allocate_pci_resources(struct em_soft } } + sc->osdep.dev = (struct device *)sc; + sc->hw.back = >osdep; + /* Only one queue for the moment. */ que = malloc(sizeof(struct em_queue), M_DEVBUF, M_NOWAIT | M_ZERO); if (que == NULL) { @@ -1703,29 +1720,10 @@ em_allocate_pci_resources(struct em_soft sc->queues = que; sc->num_queues = 1; + sc->msix = 0; sc->legacy_irq = 0; - if (pci_intr_map_msi(pa, )) { - if (pci_intr_map(pa, )) { - printf(": couldn't map interrupt\n"); - return (ENXIO); - } - sc->legacy_irq = 1; - } - - sc->osdep.dev = (struct device *)sc; - sc->hw.back = >osdep; - - intrstr = pci_intr_string(pc, ih); - sc->sc_intrhand = pci_intr_establish(pc, ih, IPL_NET | IPL_MPSAFE, -
Re: macppc kernel and clang
> From: "Theo de Raadt" > Date: Mon, 16 Mar 2020 19:13:13 -0600 > > ok deraadt > > Next up, to figure out the right plan for ofw. > > Thank you so much for figuring out these two details. > > How are the bootblocks faring? I believe somebody already tested the bootloader. Doesn't hurt to try again though. > And userland? base was in pretty good shape already and I'm rebuilding now that the ABI fix is in. Haven't tried xenocara yet. > George Koehler wrote: > > > On Mon, 16 Mar 2020 12:54:52 +0100 (CET) > > Mark Kettenis wrote: > > > > > I had a look at what NetBSD does, and they use '%L0' instead of > > > '%0+1'. As far as I can tell this works on both gcc and clang. The > > > diff below produces a working kernel when building with gcc. Not yet > > > in a position to test a clang-built kernel myself yet. But if this > > > produces a working kernel with clang as well, I'd prefer this diff > > > instead of yours. > > > > Yes, the clang kernel is working with your %L0 diff and the noinline > > stuff. I now prefer your %L0 diff, ok gkoehler@ > > > > "mftb %L0" becomes "mftb ${0:L}" in LLVM IR (clang -S -emit-llvm), > > then LLVM handles the 'L' in PPCAsmPrinter::PrintAsmOperand in > > /usr/src/gnu/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp > > > > > Index: arch/powerpc/include/cpu.h > > > === > > > RCS file: /cvs/src/sys/arch/powerpc/include/cpu.h,v > > > retrieving revision 1.65 > > > diff -u -p -r1.65 cpu.h > > > --- arch/powerpc/include/cpu.h23 Mar 2019 05:27:53 - 1.65 > > > +++ arch/powerpc/include/cpu.h16 Mar 2020 11:30:42 - > > > @@ -336,7 +336,7 @@ ppc_mftb(void) > > > u_long scratch; > > > u_int64_t tb; > > > > > > - __asm volatile ("1: mftbu %0; mftb %0+1; mftbu %1;" > > > + __asm volatile ("1: mftbu %0; mftb %L0; mftbu %1;" > > > " cmpw 0,%0,%1; bne 1b" : "=r"(tb), "=r"(scratch)); > > > return tb; > > > } > > > > > > -- > > George Koehler > > >
Re: ktrwriteraw & vget
On 16/03/20(Mon) 14:01, Martin Pieuchot wrote: > vget(9) might fail, stop right away if that happens. > > CID 1453020 Unchecked return value. Updated diff that stops tracing if vget(9) fails, similar to what's currently done if VOP_WRITE(9) fails, suggested by visa@. Code shuffling is there to avoid calling vput(9) if vget(9) doesn't succeed. ok? Index: kern/kern_ktrace.c === RCS file: /cvs/src/sys/kern/kern_ktrace.c,v retrieving revision 1.100 diff -u -p -r1.100 kern_ktrace.c --- kern/kern_ktrace.c 6 Oct 2019 16:24:14 - 1.100 +++ kern/kern_ktrace.c 17 Mar 2020 09:46:03 - @@ -649,12 +649,17 @@ ktrwriteraw(struct proc *curp, struct vn auio.uio_iovcnt++; auio.uio_resid += kth->ktr_len; } - vget(vp, LK_EXCLUSIVE | LK_RETRY); + error = vget(vp, LK_EXCLUSIVE | LK_RETRY); + if (error) + goto bad; error = VOP_WRITE(vp, , IO_UNIT|IO_APPEND, cred); - if (!error) { - vput(vp); - return (0); - } + vput(vp); + if (error) + goto bad; + + return (0); + +bad: /* * If error encountered, give up tracing on this vnode. */ @@ -663,8 +668,6 @@ ktrwriteraw(struct proc *curp, struct vn LIST_FOREACH(pr, , ps_list) if (pr->ps_tracevp == vp && pr->ps_tracecred == cred) ktrcleartrace(pr); - - vput(vp); return (error); }
Re: unbound 1.10.0
On 3/16/20 10:54 PM, Jordan Geoghegan wrote: That's good to hear, I've had a number of issues with DoT stalling on 1.9.4 and 1.9.6. I've found having multiple entries in my forwarders section has somewhat mitigated the issue. I wonder if the stalling problem is related to this: https://github.com/NLnetLabs/unbound/issues/47 I was talking about DoT servers, I haven't really noticed any big issue with DoT forwarding. That said, I had 2 stalls over the night, so unbound 1.10.0 still doesn't solve the issue. Besides, I noticed that the android app "nebulo" wasn't happy with either 1.9.6 or 1.10.0, but works fine with dnsdist which I am now trying following sthen@ advise. smime.p7s Description: S/MIME Cryptographic Signature