Hi Steven, just noticed that the last patch had issue with the formatting,
this patch is the good one.
Thanks.

Previous changes:
PATCH v1: Initial patch
PATCH v2:
   Removed arch specific code and use the default clock.
   Add more code re-usability
   Add HAVE_EARLY_BOOT_FTRACE config option, which will be disabled by default
PATCH v3:
   Write early boot temporary buffer to a sub-buffer instead of the global one.
   Improve Kconfig help text.

PATCH v4 :
        Some code refactoring.

PATCH v5 :
        fixing the build failers on arch i386.

Patch starts here:
----------------------------------------------------------------------

The early boot tracing will start from the beginning of start_kernel()
and will stop at ftrace_init()

start_kernel()
{
  ftrace_early_init() <--- start early boot function tracing
  ...
  (calls)
  ...
  ftrace_init()       <--- stop early boot function tracing
  early_trace_init();
  ...
}

The events are placed in a temporary buffer, which will be copied to
the trace buffer after memory setup.

Dynamic tracing is not implemented with live patching, we use
ftrace_filter and ftrace_notrace to find which functions to be
filtered (traced / not traced), then during the callback from
mcount hook, we do binary search lookup to decide which function
to save and which one to discard.

Signed-off-by: Abderrahmane Benbachir <abderrahmane.benbac...@polymtl.ca>
Cc: Steven Rostedt <rost...@goodmis.org>
Cc: Ingo Molnar <mi...@redhat.com>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Mathieu Desnoyers <mathieu.desnoy...@efficios.com>
Cc: linux-kernel@vger.kernel.org

---
 arch/x86/Kconfig            |   1 +
 arch/x86/kernel/ftrace_32.S |  45 ++++--
 arch/x86/kernel/ftrace_64.S |  14 ++
 include/linux/ftrace.h      |  18 ++-
 init/main.c                 |   1 +
 kernel/trace/Kconfig        |  51 +++++++
 kernel/trace/ftrace.c       | 294 +++++++++++++++++++++++++++++++++++-
 kernel/trace/trace.c        |  41 +++++
 8 files changed, 453 insertions(+), 12 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8689e794a43c..f4f754d4aa7a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -152,6 +152,7 @@ config X86
        select HAVE_FTRACE_MCOUNT_RECORD
        select HAVE_FUNCTION_GRAPH_TRACER
        select HAVE_FUNCTION_TRACER
+       select HAVE_EARLY_BOOT_FTRACE
        select HAVE_GCC_PLUGINS
        select HAVE_HW_BREAKPOINT
        select HAVE_IDE
diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S
index 4c8440de3355..a247cbf4c529 100644
--- a/arch/x86/kernel/ftrace_32.S
+++ b/arch/x86/kernel/ftrace_32.S
@@ -31,12 +31,8 @@ EXPORT_SYMBOL(mcount)
 # define MCOUNT_FRAME                  0       /* using frame = false */
 #endif
 
-ENTRY(function_hook)
-       ret
-END(function_hook)
-
-ENTRY(ftrace_caller)
 
+.macro save_mcount_regs
 #ifdef USING_FRAME_POINTER
 # ifdef CC_USING_FENTRY
        /*
@@ -73,11 +69,9 @@ ENTRY(ftrace_caller)
 
        movl    function_trace_op, %ecx
        subl    $MCOUNT_INSN_SIZE, %eax
+       .endm
 
-.globl ftrace_call
-ftrace_call:
-       call    ftrace_stub
-
+.macro restore_mcount_regs
        addl    $4, %esp                        /* skip NULL pointer */
        popl    %edx
        popl    %ecx
@@ -90,6 +84,39 @@ ftrace_call:
        addl    $4, %esp                        /* skip parent ip */
 # endif
 #endif
+       .endm
+
+ENTRY(function_hook)
+#ifdef CONFIG_EARLY_BOOT_FUNCTION_TRACER
+       cmpl    $__PAGE_OFFSET, %esp
+       jb      early_boot_stub                 /* Paging not enabled yet? */
+
+       cmpl    $ftrace_stub, ftrace_early_boot_trace_function
+       jnz early_boot_trace
+
+early_boot_stub:
+       ret
+
+early_boot_trace:
+       save_mcount_regs
+       call    *ftrace_early_boot_trace_function
+       restore_mcount_regs
+
+       jmp early_boot_stub
+#else
+       ret
+#endif
+END(function_hook)
+
+ENTRY(ftrace_caller)
+       save_mcount_regs
+
+.globl ftrace_call
+ftrace_call:
+       call    ftrace_stub
+
+       restore_mcount_regs
+
 .Lftrace_ret:
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 .globl ftrace_graph_call
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index 91b2cff4b79a..81736c6e2f9b 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -151,7 +151,21 @@ EXPORT_SYMBOL(mcount)
 #ifdef CONFIG_DYNAMIC_FTRACE
 
 ENTRY(function_hook)
+# ifdef CONFIG_EARLY_BOOT_FUNCTION_TRACER
+       cmpq $ftrace_stub, ftrace_early_boot_trace_function
+       jnz early_boot_trace
+
+early_boot_stub:
        retq
+
+early_boot_trace:
+       save_mcount_regs
+       call *ftrace_early_boot_trace_function
+       restore_mcount_regs
+       jmp early_boot_stub
+# else
+       retq
+# endif
 ENDPROC(function_hook)
 
 ENTRY(ftrace_caller)
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index dd16e8218db3..b48382de1fd4 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -239,6 +239,18 @@ static inline void ftrace_free_init_mem(void) { }
 static inline void ftrace_free_mem(struct module *mod, void *start, void *end) 
{ }
 #endif /* CONFIG_FUNCTION_TRACER */
 
+#ifdef CONFIG_EARLY_BOOT_FUNCTION_TRACER
+extern void __init ftrace_early_boot_init(char *command_line);
+extern void __init ftrace_early_boot_shutdown(void);
+extern void __init ftrace_early_boot_fill_ringbuffer(void *data);
+extern inline bool __init is_ftrace_early_boot_activated(void);
+#else
+static inline void __init ftrace_early_boot_init(char *command_line) { }
+static inline void __init ftrace_early_boot_shutdown(void) { }
+static inline void __init ftrace_early_boot_fill_ringbuffer(void *data) { }
+static inline bool __init is_ftrace_early_boot_activated(void) { return false; 
}
+#endif
+
 #ifdef CONFIG_STACK_TRACER
 
 #define STACK_TRACE_ENTRIES 500
@@ -443,6 +455,10 @@ unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec);
 
 extern ftrace_func_t ftrace_trace_function;
 
+#if defined(CONFIG_EARLY_BOOT_FUNCTION_TRACER) && 
defined(CONFIG_DYNAMIC_FTRACE)
+extern ftrace_func_t ftrace_early_boot_trace_function;
+#endif
+
 int ftrace_regex_open(struct ftrace_ops *ops, int flag,
                  struct inode *inode, struct file *file);
 ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf,
@@ -716,7 +732,7 @@ static inline unsigned long get_lock_parent_ip(void)
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
 extern void ftrace_init(void);
 #else
-static inline void ftrace_init(void) { }
+static inline void ftrace_init(void) { ftrace_early_boot_shutdown(); }
 #endif
 
 /*
diff --git a/init/main.c b/init/main.c
index ee147103ba1b..07b83997b6a7 100644
--- a/init/main.c
+++ b/init/main.c
@@ -539,6 +539,7 @@ asmlinkage __visible void __init start_kernel(void)
        char *command_line;
        char *after_dashes;
 
+       ftrace_early_boot_init(boot_command_line);
        set_task_stack_end_magic(&init_task);
        smp_setup_processor_id();
        debug_objects_early_init();
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 5e3de28c7677..e56bb2d4c395 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -19,6 +19,11 @@ config HAVE_FUNCTION_TRACER
        help
          See Documentation/trace/ftrace-design.rst
 
+config HAVE_EARLY_BOOT_FTRACE
+        bool
+        help
+          See Documentation/trace/ftrace-design.txt
+
 config HAVE_FUNCTION_GRAPH_TRACER
        bool
        help
@@ -155,6 +160,52 @@ config FUNCTION_TRACER
          (the bootup default), then the overhead of the instructions is very
          small and not measurable even in micro-benchmarks.
 
+config EARLY_BOOT_FUNCTION_TRACER
+       bool "Early Boot Kernel Function Tracer"
+       depends on FUNCTION_TRACER
+       depends on HAVE_EARLY_BOOT_FTRACE
+       help
+         Normally, function tracing can only start after memory has been
+         initialized early in boot. If "ftrace=function" is added to the
+         command line, then function tracing will start after memory setup.
+         In order to trace functions before that, this option will
+         have function tracing start before memory setup is complete, by
+         placing the trace in a temporary buffer, which will be copied to
+         the trace buffer after memory setup. The size of this temporary
+         buffer is defined by EARLY_BOOT_FTRACE_BUF_SHIFT.
+
+config EARLY_BOOT_FTRACE_BUF_SHIFT
+       int "Temporary buffer size (17 => 128 KB, 24 => 16 MB)"
+       depends on EARLY_BOOT_FUNCTION_TRACER
+       range 8 24
+       default 19
+       help
+         Select the size of the buffer to be used for storing function calls at
+         early boot stage.
+         The value defines the size as a power of 2, must be in a range of 8 
to 24.
+         Examples:
+           20 =>   1 MB
+           19 => 512 KB
+           17 => 128 KB
+
+config EARLY_BOOT_FTRACE_FILTER_SHIFT
+       int "Temporary filter size (filter/notrace) (17 => 128 KB, 19 => 512 
KB)"
+       depends on EARLY_BOOT_FUNCTION_TRACER
+       depends on FTRACE_MCOUNT_RECORD
+       range 0 19
+       default 17
+       help
+         Select the size of the filter buffer to be used for filtering (trace/
+         no trace) functions at early boot stage.
+         Two buffers (trace and no_trace) will be created using this option.
+         These following kernel parameters control filtering during bootup :
+         "ftrace_filter", "ftrace_notrace".
+         The value defines the size as a power of 2, must be in a range of 0 
to 19.
+         Examples:
+           19 => 512 KB for each buffer
+           18 => 256 KB for each buffer
+           17 => 128 KB for each buffer
+
 config FUNCTION_GRAPH_TRACER
        bool "Kernel Function Graph Tracer"
        depends on HAVE_FUNCTION_GRAPH_TRACER
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 77734451cb05..6d2c00f08f13 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -65,6 +65,11 @@
 #define FTRACE_HASH_DEFAULT_BITS 10
 #define FTRACE_HASH_MAX_BITS 12
 
+#ifdef CONFIG_FTRACE_MCOUNT_RECORD
+extern unsigned long __start_mcount_loc[];
+extern unsigned long __stop_mcount_loc[];
+#endif
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 #define INIT_OPS_HASH(opsname) \
        .func_hash              = &opsname.local_hash,                  \
@@ -6126,11 +6131,11 @@ void __init ftrace_free_init_mem(void)
 
 void __init ftrace_init(void)
 {
-       extern unsigned long __start_mcount_loc[];
-       extern unsigned long __stop_mcount_loc[];
        unsigned long count, flags;
        int ret;
 
+    ftrace_early_boot_shutdown();
+
        local_irq_save(flags);
        ret = ftrace_dyn_arch_init();
        local_irq_restore(flags);
@@ -7095,3 +7100,288 @@ void ftrace_graph_exit_task(struct task_struct *t)
        kfree(ret_stack);
 }
 #endif
+
+
+#ifdef CONFIG_EARLY_BOOT_FUNCTION_TRACER
+
+#define EARLY_BOOT_BUFF_LEN ((1 << CONFIG_EARLY_BOOT_FTRACE_BUF_SHIFT) / \
+                                       sizeof(struct ftrace_early_boot_entry))
+
+struct ftrace_early_boot_entry {
+       unsigned long ip;
+       unsigned long parent_ip;
+       unsigned long long timestamp;
+};
+/*
+ * Only CPU0 is running in early stage, no need to have per-cpu buffer
+ */
+static struct ftrace_early_boot_entry 
ftrace_early_boot_entries[EARLY_BOOT_BUFF_LEN] __initdata;
+static const unsigned long EARLY_BOOT_BUFF_MAX __initconst = 
EARLY_BOOT_BUFF_LEN;
+static unsigned long early_boot_entries_count __initdata;
+static char tmp_cmdline[COMMAND_LINE_SIZE] __initdata;
+static bool ftrace_early_boot_activated __initdata = false;
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+ftrace_func_t ftrace_early_boot_trace_function __read_mostly = ftrace_stub;
+#else
+# define ftrace_early_boot_trace_function ftrace_trace_function
+#endif
+
+inline bool __init is_ftrace_early_boot_activated(void)
+{
+       return ftrace_early_boot_activated;
+}
+
+static inline __init void ftrace_early_boot_disable(void)
+{
+       ftrace_early_boot_trace_function = ftrace_stub;
+}
+
+#ifdef CONFIG_FTRACE_MCOUNT_RECORD
+#define EARLY_BOOT_FILTER_LEN ((1 << CONFIG_EARLY_BOOT_FTRACE_FILTER_SHIFT) / \
+                                       sizeof(unsigned long))
+
+struct ftrace_early_boot_filtering {
+       unsigned long list[EARLY_BOOT_FILTER_LEN];
+       char buf[COMMAND_LINE_SIZE];
+       int size;
+};
+
+static const unsigned long EARLY_BOOT_FILTER_MAX __initconst = 
EARLY_BOOT_FILTER_LEN;
+static struct ftrace_early_boot_filtering ftrace_data_notrace __initdata;
+static struct ftrace_early_boot_filtering ftrace_data_filter __initdata;
+
+static __init int ftrace_early_boot_filter_has_addr(unsigned long addr,
+       unsigned long *filter, int *size)
+{
+       int i;
+
+       for (i = 0; i < *size; i++) {
+               if (filter[i] == addr)
+                       return 1;
+       }
+       return 0;
+}
+
+static __init int
+ftrace_early_boot_match_record(unsigned long ip, struct ftrace_glob *func_g)
+{
+       char str[KSYM_SYMBOL_LEN];
+       char *modname;
+
+       kallsyms_lookup(ip, NULL, NULL, &modname, str);
+       return ftrace_match(str, func_g);
+}
+
+static __init void
+ftrace_early_boot_regex(char *func, unsigned long *filter, int *size)
+{
+       struct ftrace_glob func_g = { .type = MATCH_FULL };
+       unsigned long *start = __start_mcount_loc;
+       unsigned long *end = __stop_mcount_loc;
+       unsigned long count;
+       unsigned long addr;
+       unsigned long *p;
+       int clear_filter = 0;
+
+       count = end - start;
+
+       if (!count)
+               return;
+
+       if (func) {
+               func_g.type = filter_parse_regex(func, strlen(func), 
&func_g.search,
+                                                &clear_filter);
+               func_g.len = strlen(func_g.search);
+       }
+
+       p = start;
+       while (p < end) {
+               addr = ftrace_call_adjust(*p++);
+               if (!addr)
+                       continue;
+
+               if ((*size) > EARLY_BOOT_FILTER_MAX)
+                       return;
+
+               if (ftrace_early_boot_match_record(addr, &func_g)) {
+                       if (!ftrace_early_boot_filter_has_addr(addr, filter, 
size))
+                               filter[(*size)++] = addr;
+               }
+       }
+}
+
+static __init int ftrace_addr_compare(const void *a, const void *b)
+{
+       if (*(unsigned long *)a > *(unsigned long *)b)
+               return 1;
+       if (*(unsigned long *)a < *(unsigned long *)b)
+               return -1;
+
+       return 0;
+}
+
+static __init void ftrace_addr_swap(void *a, void *b, int size)
+{
+       unsigned long t = *(unsigned long *)a;
+       *(unsigned long *)a = *(unsigned long *)b;
+       *(unsigned long *)b = t;
+}
+
+static __init int set_ftrace_early_boot_filtering(void *data, char *str)
+{
+       struct ftrace_early_boot_filtering *ftrace_data = data;
+       char *func;
+       char *buf;
+
+       if (!ftrace_data)
+               return 0;
+       buf = ftrace_data->buf;
+       strlcpy(buf, str, COMMAND_LINE_SIZE);
+
+       while (buf) {
+               func = strsep(&buf, ",");
+               ftrace_early_boot_regex(func, ftrace_data->list, 
&ftrace_data->size);
+       }
+       /* sort filter to use binary search on it */
+       sort(ftrace_data->list, ftrace_data->size,
+               sizeof(unsigned long), ftrace_addr_compare, ftrace_addr_swap);
+
+       return 1;
+}
+
+#define ftrace_early_boot_bsearch_addr(addr, data) bsearch(&addr, data.list,\
+       data.size, sizeof(unsigned long), ftrace_addr_compare)
+
+#endif /* CONFIG_FTRACE_MCOUNT_RECORD */
+
+
+static __init void
+ftrace_function_early_boot_trace_call(unsigned long ip, unsigned long 
parent_ip,
+                       struct ftrace_ops *op, struct pt_regs *regs)
+{
+       struct ftrace_early_boot_entry *entry;
+
+#ifdef CONFIG_FTRACE_MCOUNT_RECORD
+       if (ftrace_data_notrace.size &&
+                       ftrace_early_boot_bsearch_addr(ip, ftrace_data_notrace))
+               return;
+
+       if (ftrace_data_filter.size &&
+                       !ftrace_early_boot_bsearch_addr(ip, ftrace_data_filter))
+               return;
+#endif
+
+       if (early_boot_entries_count >= EARLY_BOOT_BUFF_MAX) {
+               /* stop tracing when buffer is full */
+               ftrace_early_boot_disable();
+               return;
+       }
+
+       entry = &ftrace_early_boot_entries[early_boot_entries_count++];
+       entry->ip = ip;
+       entry->parent_ip = parent_ip;
+       entry->timestamp = trace_clock_local();
+}
+
+/*
+ * this will be used as __setup_param
+ */
+struct ftrace_early_boot_obs_param {
+       int (*setup_func)(void *data, char *str);
+       const char *str;
+       void *data;
+};
+static struct ftrace_early_boot_obs_param ftrace_early_boot_params[] 
__initdata = {
+#ifdef CONFIG_FTRACE_MCOUNT_RECORD
+       {
+               .str = "ftrace_notrace",
+               .data = &ftrace_data_notrace,
+               .setup_func = set_ftrace_early_boot_filtering,
+       },
+       {
+               .str = "ftrace_filter",
+               .data = &ftrace_data_filter,
+               .setup_func = set_ftrace_early_boot_filtering,
+       },
+#endif
+};
+
+static __init int ftrace_do_early_boot_param(char *param, char *val,
+                                const char *unused, void *arg)
+{
+       int size = ARRAY_SIZE(ftrace_early_boot_params);
+       struct ftrace_early_boot_obs_param *p;
+       int i;
+
+       for (i = 0; i < size; i++) {
+               p = &ftrace_early_boot_params[i];
+               if (strcmp(param, p->str) == 0) {
+                       p->setup_func(p->data, val);
+                       return 0;
+               }
+       }
+       return 0;
+}
+
+void __init ftrace_early_boot_init(char *command_line)
+{
+       /* proceed only if function tracing was enabled */
+       if (!strstr(command_line, "ftrace=function "))
+               return;
+
+       strlcpy(tmp_cmdline, command_line, COMMAND_LINE_SIZE);
+       parse_args("ftrace early boot options", tmp_cmdline, NULL, 0, 0, 0, 
NULL,
+               ftrace_do_early_boot_param);
+
+       ftrace_early_boot_activated = true;
+       // After this point, we enable early function tracing
+       ftrace_early_boot_trace_function = 
ftrace_function_early_boot_trace_call;
+}
+
+void __init ftrace_early_boot_shutdown(void)
+{
+       // Disable early tracing
+       ftrace_early_boot_disable();
+
+#ifdef CONFIG_FTRACE_MCOUNT_RECORD
+       pr_info("ftrace: early boot %lu entries, notrace=%d, filter=%d",
+               early_boot_entries_count,
+               ftrace_data_notrace.size,
+               ftrace_data_filter.size);
+#else
+       pr_info("ftrace: early boot %lu recorded entries", 
early_boot_entries_count);
+#endif
+}
+
+/*
+ * Will be passed to ringbuffer by early_boot_trace_clock
+ */
+static u64 early_timestamp __initdata;
+
+static __init u64 early_boot_trace_clock(void)
+{
+       return early_timestamp;
+}
+
+void __init ftrace_early_boot_fill_ringbuffer(void *data)
+{
+       struct ftrace_early_boot_entry *entry;
+       struct trace_array *tr = data;
+       int i;
+
+       if (ftrace_early_boot_entries <= 0)
+               return;
+
+       ring_buffer_set_clock(tr->trace_buffer.buffer, early_boot_trace_clock);
+
+       for (i = 0; i < early_boot_entries_count; i++) {
+               entry = &ftrace_early_boot_entries[i];
+               early_timestamp = entry->timestamp;
+               trace_function(tr, entry->ip, entry->parent_ip, 0, 0);
+       }
+
+       ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clock_local);
+}
+
+#endif /* CONFIG_EARLY_BOOT_FUNCTION_TRACER */
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ff1c4b20cd0a..596ee5dcf108 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -8649,3 +8649,44 @@ __init static int tracing_set_default_clock(void)
 }
 late_initcall_sync(tracing_set_default_clock);
 #endif
+
+#ifdef CONFIG_EARLY_BOOT_FUNCTION_TRACER
+/*
+ * The early boot tracer should be the second trace array added,
+ */
+static __init struct trace_array *early_boot_trace_array(void)
+{
+       struct trace_array *tr;
+
+       if (list_empty(&ftrace_trace_arrays))
+               return NULL;
+
+       tr = list_entry(ftrace_trace_arrays.next,
+                       typeof(*tr), list);
+       WARN_ON(!(tr->flags & TRACE_ARRAY_FL_GLOBAL));
+       return tr;
+}
+
+static __init int early_boot_tracer_init_tracefs(void)
+{
+       struct trace_array *tr;
+
+       if (!is_ftrace_early_boot_activated())
+               return 0;
+
+       if (instance_mkdir("early_boot"))
+               return 0;
+
+       tr = early_boot_trace_array();
+       if (!tr) {
+               pr_info("ftrace: early_boot array tracer not found\n");
+               return 0;
+       }
+       /* fill the ring buffer with early boot events */
+       ftrace_early_boot_fill_ringbuffer(tr);
+
+       return 0;
+}
+
+fs_initcall(early_boot_tracer_init_tracefs);
+#endif /* CONFIG_EARLY_BOOT_FUNCTION_TRACER */
-- 
2.17.1


Reply via email to