On Sat, Dec 01, 2018 at 04:34:57PM +0100, Anton Lindqvist wrote:
> On Tue, Nov 27, 2018 at 05:52:15PM -0800, Greg Steuck wrote:
> > I booted the patched kernel and it seems to have gone farther and I believe
> > reached init before crashing.
> 
> By performing a semi-automated bisect I was able to identify the source
> files that are incompatible with tracing. Common for all source files
> seems to be that they define routines called early on in the boot
> process before curcpu() is usable.
> 
> I do not have any plans on committing the diff below but please give it
> a try. Instead, I'm working on extending the files.conf(5) grammar in
> order to infer a different make target for the files.
> 
> Index: arch/amd64/conf/Makefile.amd64
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/conf/Makefile.amd64,v
> retrieving revision 1.106
> diff -u -p -r1.106 Makefile.amd64
> --- arch/amd64/conf/Makefile.amd64    30 Oct 2018 11:08:30 -0000      1.106
> +++ arch/amd64/conf/Makefile.amd64    1 Dec 2018 15:32:58 -0000
> @@ -151,7 +151,31 @@ vers.o: ${SYSTEM_DEP:Ngap.o}
>       ${CC} ${CFLAGS} ${CPPFLAGS} ${PROF} -c vers.c
>  
>  .if ${SYSTEM_OBJ:Mkcov.o} && ${COMPILER_VERSION:Mclang}
> +amd64_mem.o: $S/arch/amd64/amd64/amd64_mem.c
> +     ${NORMAL_C} -fno-sanitize-coverage=trace-pc
> +cpu.o: $S/arch/amd64/amd64/cpu.c
> +     ${NORMAL_C} -fno-sanitize-coverage=trace-pc
> +fpu.o: $S/arch/amd64/amd64/fpu.c
> +     ${NORMAL_C} -fno-sanitize-coverage=trace-pc
> +gdt.o: $S/arch/amd64/amd64/gdt.c
> +     ${NORMAL_C} -fno-sanitize-coverage=trace-pc
> +intr.o: $S/arch/amd64/amd64/intr.c
> +     ${NORMAL_C} -fno-sanitize-coverage=trace-pc
> +lapic.o: $S/arch/amd64/amd64/lapic.c
> +     ${NORMAL_C} -fno-sanitize-coverage=trace-pc
> +machdep.o: $S/arch/amd64/amd64/machdep.c
> +     ${NORMAL_C} -fno-sanitize-coverage=trace-pc
> +tsc.o: $S/arch/amd64/amd64/tsc.c
> +     ${NORMAL_C} -fno-sanitize-coverage=trace-pc
>  kcov.o: $S/dev/kcov.c
> +     ${NORMAL_C} -fno-sanitize-coverage=trace-pc
> +pvbus.o: $S/dev/pv/pvbus.c
> +     ${NORMAL_C} -fno-sanitize-coverage=trace-pc
> +kern_lock.o: $S/kern/kern_lock.c
> +     ${NORMAL_C} -fno-sanitize-coverage=trace-pc
> +kern_sched.o: $S/kern/kern_sched.c
> +     ${NORMAL_C} -fno-sanitize-coverage=trace-pc
> +kern_tc.o: $S/kern/kern_tc.c
>       ${NORMAL_C} -fno-sanitize-coverage=trace-pc
>  .endif
>  

Here's a new diff taking a different approach. Keeping tracing off until
all secondary CPUs have booted solves the issue of accessing curcpu()
too early. Another issue was then discovered, curproc can be NULL before
the idle thread tied the current CPU has started. Currently running with
this diff applied on my laptop (MP) and positive results from Greg. The
diff will be further exercised in the actual syzkaller setup before
committing.

Comments? OK?

diff --git sys/dev/kcov.c sys/dev/kcov.c
index 8e36bc8b8ef..c97aae4ed5d 100644
--- sys/dev/kcov.c
+++ sys/dev/kcov.c
@@ -58,6 +58,8 @@ static inline int inintr(void);
 
 TAILQ_HEAD(, kcov_dev) kd_list = TAILQ_HEAD_INITIALIZER(kd_list);
 
+int kcov_cold = 1;
+
 #ifdef KCOV_DEBUG
 int kcov_debug = 1;
 #endif
@@ -76,19 +78,31 @@ int kcov_debug = 1;
 void
 __sanitizer_cov_trace_pc(void)
 {
-       extern int cold;
        struct kcov_dev *kd;
+       struct proc *p;
        uint64_t idx;
 
-       /* Do not trace during boot. */
-       if (cold)
+       /*
+        * Do not trace before all secondary CPUs have booted.
+        * Accessing the current CPU during boot causes a subtle crash since its
+        * GSBASE register has not yet been written.
+        */
+       if (kcov_cold)
                return;
 
        /* Do not trace in interrupts to prevent noisy coverage. */
        if (inintr())
                return;
 
-       kd = curproc->p_kd;
+       /*
+        * Protect against when the idle thread for the current CPU has not yet
+        * started and curproc is absent.
+        */
+       p = curproc;
+       if (p == NULL)
+               return;
+
+       kd = p->p_kd;
        if (kd == NULL || kd->kd_mode != KCOV_MODE_TRACE_PC)
                return;
 
@@ -226,6 +240,12 @@ kcov_exit(struct proc *p)
        p->p_kd = NULL;
 }
 
+void
+kcov_init(void)
+{
+       kcov_cold = 0;
+}
+
 struct kcov_dev *
 kd_lookup(int unit)
 {
diff --git sys/kern/init_main.c sys/kern/init_main.c
index 91070090bb1..25b71fd89ce 100644
--- sys/kern/init_main.c
+++ sys/kern/init_main.c
@@ -103,6 +103,11 @@ extern void nfs_init(void);
 #include "vscsi.h"
 #include "softraid.h"
 
+#include "kcov.h"
+#if NKCOV > 0
+#include <sys/kcov.h>
+#endif
+
 const char     copyright[] =
 "Copyright (c) 1982, 1986, 1989, 1991, 1993\n"
 "\tThe Regents of the University of California.  All rights reserved.\n"
@@ -555,6 +560,10 @@ main(void *framep)
 
        config_process_deferred_mountroot();
 
+#if NKCOV > 0
+       kcov_init();
+#endif
+
        /*
         * Okay, now we can let init(8) exec!  It's off to userland!
         */
diff --git sys/sys/kcov.h sys/sys/kcov.h
index 752b290e615..3ff32b330e4 100644
--- sys/sys/kcov.h
+++ sys/sys/kcov.h
@@ -30,6 +30,7 @@
 #define KCOV_BUF_MAX_NMEMB     (256 << 10)
 
 void kcov_exit(struct proc *);
+void kcov_init(void);
 
 #endif /* _KERNEL */
 

Reply via email to