From: Waldemar Kozaczuk <[email protected]>
Committer: WALDEMAR KOZACZUK <[email protected]>
Branch: master

Add strace-like functionality

This PR implements simple strace-like mechanism described by #1263.

In essence, it adds a new structure trace_log that holds a fixed size (64K)
array of pointers to trace_records and acts as a ring buffer between threads
capturing active tracepoints (producers) and new thread strace (single consumer)
that prints them to the console. The write pointer - write_offset - is atomic 
that
rolls over every 64K and the strace thread follows it and tries to print
each trace (see print_trace).

The strace can be activated by adding "--strace" kernel command line argument.

./scripts/firecracker.py -e '--trace=vfs* --strace /hello'
OSv v0.57.0-75-gb7150705
init              0  0.001666500 vfs_mkdir("/dev" 0755)
init              0  0.001685758 vfs_mkdir_ret()
init              0  0.001698722 vfs_dup(0)
init              0  0.001699344 vfs_dup_ret(1)
init              0  0.001699520 vfs_dup(0)
init              0  0.001699644 vfs_dup_ret(2)
>init             0  0.006818073 vfs_mkdir("/rofs" 0755)
>init             0  0.006821609 vfs_mkdir_ret()
Booted up in 13.77 ms
Cmdline: /hello
>init             0  0.008424749 vfs_open("/usr/lib/fs" 0x0 00)
Hello from C code
>init             0  0.008429978 vfs_open_err(2)
>init             0  0.008435063 vfs_open("/etc/fstab" 0x0 00)
>init             0  0.008438018 vfs_open_ret(3)
>init             0  0.008531832 vfs_open("/dev" 0x0 00)
>init             0  0.008534193 vfs_open_ret(4)
>init             0  0.008534456 vfs_close(4)
>init             0  0.008537971 vfs_close_ret()
>init             0  0.008554489 vfs_open("/proc" 0x0 00)
>init             0  0.008556158 vfs_open_ret(4)
>init             0  0.008556252 vfs_close(4)
>init             0  0.008556763 vfs_close_ret()
>init             0  0.008583356 vfs_open("/sys" 0x0 00)
>init             0  0.008585037 vfs_open_ret(4)
>init             0  0.008585171 vfs_close(4)
>init             0  0.008585676 vfs_close_ret()
>init             0  0.008605026 vfs_open("/tmp" 0x0 00)
>init             0  0.008606443 vfs_open_ret(4)
>init             0  0.008606579 vfs_close(4)
>init             0  0.008607093 vfs_close_ret()
>init             0  0.008619345 vfs_close(3)
>init             0  0.008620198 vfs_close_ret()
>init             0  0.008634389 vfs_ioctl(1 0x5401)
>init             0  0.008634974 vfs_ioctl_ret()
>init             0  0.008635317 vfs_pwritev(1 0x200000501690 0x2 0xffffffff)
>init             0  0.008953023 vfs_pwritev_ret(0x16)
>init             0  0.008954858 vfs_pwritev(1 0x200000501660 0x2 0xffffffff)
>init             0  0.009209918 vfs_pwritev_ret(0x11)
>init             0  0.009235297 vfs_open("/init" 0x0 00)
>init             0  0.009237413 vfs_open_err(2)
>init             0  0.009259309 vfs_lstat(pathname=/hello, stat=0x2000004fe5e0)
>init             0  0.009265126 vfs_lstat_ret()
>init             0  0.009265804 vfs_open("/hello" 0x0 00)
>init             0  0.009267528 vfs_open_ret(3)
>init             0  0.009267772 vfs_close(3)
>init             0  0.009268057 vfs_close_ret()
/hello            0  0.009949201 vfs_pwritev(1 0x200000601e90 0x2 0xffffffff)
/hello            0  0.011528630 vfs_pwritev_ret(0x12)

Fixes #1263

---
diff --git a/Makefile b/Makefile
--- a/Makefile
+++ b/Makefile
@@ -1045,6 +1045,7 @@ objects += core/mmio.o
 objects += core/kprintf.o
 objects += core/trace.o
 objects += core/trace-count.o
+objects += core/strace.o
 objects += core/callstack.o
 objects += core/poll.o
 objects += core/select.o
diff --git a/core/strace.cc b/core/strace.cc
--- a/core/strace.cc
+++ b/core/strace.cc
@@ -0,0 +1,145 @@
+/*
+ * Copyright (C) 2023 Waldemar Kozaczuk
+ *
+ * This work is open source software, licensed under the terms of the
+ * BSD license as described in the LICENSE file in the top-level directory.
+ */
+
+#include <osv/strace.hh>
+#include <osv/sched.hh>
+#include "drivers/console.hh"
+
+trace_log* _trace_log = nullptr;
+
+static void print_trace(trace_record* tr) {
+    char msg[512];
+    auto tp = tr->tp;
+    float time = tr->time;
+    std::string thread_name = tr->thread_name.data();
+
+    auto len = snprintf(msg, 512, "%-15s %3d %12.9f %s(", thread_name.c_str(), 
tr->cpu, time / 1000000000, tp->name);
+    auto left = 512 - len;
+    auto m = msg + len;
+
+    auto fmt = tp->format;
+    //Copy all up to 1st '%'
+    while (*fmt && *fmt != '%' && left > 0) {
+       *m++ = *fmt++;
+       left--;
+       len++;
+    }
+
+    auto buf = tr->buffer;
+    auto sig = tr->tp->sig;
+    int written = 0;
+
+    if (tr->backtrace) {
+        buf += tracepoint_base::backtrace_len * sizeof(void*);
+    }
+
+    while (*sig != 0 && left > 2) {
+        //Copy fragment of tp->format up to next '%'
+        char _fmt[128];
+        int i = 0;
+        do {
+           _fmt[i++] = *fmt++;
+        } while (*fmt && *fmt != '%');
+        _fmt[i] = 0;
+
+        //Detect type of data, deserialize and print to the msg
+        switch (*sig++) {
+        case 'c':
+            buf = align_up(buf, object_serializer<char>().alignment());
+            written = snprintf(m, left, _fmt,  
*reinterpret_cast<char*>(buf++));
+            break;
+        case 'b':
+        case 'B':
+            buf = align_up(buf, object_serializer<u8>().alignment());
+            written = snprintf(m, left, _fmt,  *buf++);
+            break;
+        case 'h':
+        case 'H':
+            buf = align_up(buf, object_serializer<u16>().alignment());
+            written = snprintf(m, left, _fmt,  *reinterpret_cast<u16*>(buf));
+            buf += sizeof(u16);
+            break;
+        case 'i':
+        case 'I':
+        case 'f':
+            buf = align_up(buf, object_serializer<u32>().alignment());
+            written = snprintf(m, left, _fmt,  *reinterpret_cast<u32*>(buf));
+            buf += sizeof(u32);
+            break;
+        case 'q':
+        case 'Q':
+        case 'd':
+        case 'P':
+            buf = align_up(buf, object_serializer<u64>().alignment());
+            written = snprintf(m, left, _fmt,  *reinterpret_cast<u64*>(buf));
+            buf += sizeof(u64);
+            break;
+        case '?':
+            written = snprintf(m, left, _fmt,  
*reinterpret_cast<bool*>(buf++));
+            break;
+        case 'p': {
+            //string
+            char str[128];
+            auto slen = *buf++;
+            int i = 0;
+            while (slen-- && i < 127) {
+                str[i++] = *reinterpret_cast<char*>(buf++);
+            }
+            str[i] = 0;
+            written = snprintf(m, left, _fmt, str);
+            buf += (object_serializer<const char*>::max_len - i - 1);
+            break;
+        }
+        case '*': {
+            //binary data
+            buf = align_up(buf, sizeof(u16));
+            char str[256];
+            auto slen = *reinterpret_cast<u16*>(buf);
+            int i = 0;
+            buf += 2;
+            str[i++] = '{';
+            while (slen-- && (i + 4) < 255) {
+                auto byte = *reinterpret_cast<u8*>(buf++);
+                auto high = byte / 16;
+                str[i++] = high < 10 ? '0' + high : 'a' + (high - 10);
+                auto low = byte % 16;
+                str[i++] = low < 10 ? '0' + low : 'a' + (low - 10);
+                str[i++] = ' ';
+            }
+            str[i++] = '}';
+            str[i] = 0;
+            written = snprintf(m, left, _fmt, str);
+            break;
+        }
+        default:
+            assert(0 && "should not reach");
+        }
+
+        left -= written;
+        len += written;
+        m += written;
+    }
+    *m++ = ')';
+    *m++ = '\n';
+    console::write(msg, len + 2);
+}
+
+static sched::thread *strace = nullptr;
+
+void start_strace() {
+    _trace_log = new trace_log();
+    strace = sched::thread::make([] {
+        while (true) {
+            while (auto tr = _trace_log->read()) {
+                print_trace(tr);
+            }
+            sched::thread::sleep(std::chrono::microseconds(100));
+        }
+    }, sched::thread::attr().name("strace"));
+
+    strace->start();
+}
diff --git a/core/trace.cc b/core/trace.cc
--- a/core/trace.cc
+++ b/core/trace.cc
@@ -27,6 +27,7 @@
 #include <osv/semaphore.hh>
 #include <osv/elf.hh>
 #include <cxxabi.h>
+#include "drivers/console.hh"
 
 using namespace std;
 
@@ -186,6 +187,14 @@ void enable_backtraces(bool backtrace) {
     }
 }
 
+void list_all_tracepoints() {
+    char buf[128];
+    for (auto& tp : tracepoint_base::tp_list) {
+        auto len = snprintf(buf, 128, "Tracepoint %s @ %p\n", tp.name, &tp);
+        console::write(buf, len);
+    }
+}
+
 namespace std {
 
 template<>
diff --git a/include/osv/strace.hh b/include/osv/strace.hh
--- a/include/osv/strace.hh
+++ b/include/osv/strace.hh
@@ -0,0 +1,15 @@
+/*
+ * Copyright (C) 2023 Waldemar Kozaczuk
+ *
+ * This work is open source software, licensed under the terms of the
+ * BSD license as described in the LICENSE file in the top-level directory.
+ */
+
+#ifndef STRACE_HH_
+#define STRACE_HH_
+
+#include <osv/trace.hh>
+
+void start_strace();
+
+#endif
diff --git a/include/osv/trace.hh b/include/osv/trace.hh
--- a/include/osv/trace.hh
+++ b/include/osv/trace.hh
@@ -23,10 +23,13 @@
 #include <arch.hh>
 #include <osv/rcu.hh>
 #include <safe-ptr.hh>
+#include <stdint.h>
+#include <atomic>
 
 void enable_trace();
 void enable_tracepoint(std::string wildcard);
 void enable_backtraces(bool = true);
+void list_all_tracepoints();
 
 class tracepoint_base;
 
@@ -48,6 +51,40 @@ struct trace_record {
     };
 };
 
+//Simple lock-less multiple-producer single-consumer structure
+//designed to act as a data gateway between threads generating trace
+//records and the strace thread printing them to the console
+//
+//In essence it is an array of pointers to the trace records
+//indexed by write_offset which stores next entry offset to write and
+//is atomic to guarantee no two producers step on each other and
+//read_offset that stores offset of next entry to read
+//
+//The trace_log is designed as a circular buffer where
+//both read and write offsets would wrap around from 0xffff to 0
+//so it is possible that with huge number of trace record written
+//and slow consumer some data may get overwritten
+constexpr const size_t trace_log_size = 0x10000;
+struct trace_log {
+    trace_record *traces[trace_log_size];
+    std::atomic<uint16_t> write_offset = {0};
+    uint16_t read_offset = {0};
+
+    void write(trace_record* tr) {
+        traces[write_offset.fetch_add(1)] = tr;
+    }
+
+    trace_record* read() {
+        if (read_offset == write_offset.load()) {
+            return nullptr;
+        } else {
+            return traces[read_offset++];
+        }
+    }
+};
+
+extern trace_log* _trace_log;
+
 template <size_t idx, size_t N, typename... args>
 struct tuple_formatter
 {
@@ -369,6 +406,9 @@ public:
         serialize(buffer, as);
         barrier();
         tr->tp = this; // do this last to indicate the record is complete
+        if (_trace_log) {
+            _trace_log->write(tr);
+        }
     }
     void serialize(void* buffer, std::tuple<s_args...> as) {
         serializer<0, sizeof...(s_args), s_args...>::write(buffer, 0, as);
diff --git a/loader.cc b/loader.cc
--- a/loader.cc
+++ b/loader.cc
@@ -31,6 +31,7 @@
 #include "arch.hh"
 #include "arch-setup.hh"
 #include "osv/trace.hh"
+#include "osv/strace.hh"
 #include <osv/power.hh>
 #include <osv/rcu.hh>
 #include <osv/mempool.hh>
@@ -150,6 +151,8 @@ static bool opt_leak = false;
 static bool opt_noshutdown = false;
 bool opt_power_off_on_abort = false;
 static bool opt_log_backtrace = false;
+static bool opt_list_tracepoints = false;
+static bool opt_strace = false;
 static bool opt_mount = true;
 static bool opt_pivot = true;
 static std::string opt_rootfs;
@@ -179,6 +182,8 @@ static void usage()
     std::cout << "  --sampler=arg         start stack sampling profiler\n";
     std::cout << "  --trace=arg           tracepoints to enable\n";
     std::cout << "  --trace-backtrace     log backtraces in the tracepoint 
log\n";
+    std::cout << "  --trace-list          list available tracepoints\n";
+    std::cout << "  --strace              start a thread to print tracepoints 
to the console on the fly\n";
     std::cout << "  --leak                start leak detector after boot\n";
     std::cout << "  --nomount             don't mount the root file system\n";
     std::cout << "  --nopivot             do not pivot the root from bootfs to 
the root fs\n";
@@ -260,6 +265,10 @@ static void parse_options(int loader_argc, char** 
loader_argv)
         opt_log_backtrace = true;
     }
 
+    if (extract_option_flag(options_values, "trace-list")) {
+        opt_list_tracepoints = true;
+    }
+
     if (extract_option_flag(options_values, "verbose")) {
         opt_verbose = true;
         enable_verbose();
@@ -283,6 +292,9 @@ static void parse_options(int loader_argc, char** 
loader_argv)
                 enable_tracepoint(t);
             }
         }
+        if (extract_option_flag(options_values, "strace")) {
+            opt_strace = true;
+        }
     }
 
     opt_mount = !extract_option_flag(options_values, "nomount");
@@ -723,12 +735,19 @@ void main_cont(int loader_argc, char** loader_argv)
         poweroff();
     }
 
+    if (opt_list_tracepoints) {
+        list_all_tracepoints();
+    }
+
     enable_trace();
     if (opt_log_backtrace) {
         // can only do this after smp_launch, otherwise the IDT is not 
initialized,
         // and backtrace_safe() fails as soon as we get an exception
         enable_backtraces();
     }
+    if (opt_strace) {
+        start_strace();
+    }
     sched::init_detached_threads_reaper();
     elf::setup_missing_symbols_detector();
 

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/00000000000040485c060954e080%40google.com.

Reply via email to