This patch addresses two issues related to Golang support on OSv.

Firstly it provides a workaround around the issue described by #810.
In essence some applications like Golang runtime during initialization
of it's ELF object invoke init functions that access variables from TLS
(Thread Local Storage) memory area by static offset determined during
compilation. This type of TLS access is called "initial exec" 
(or static) and is only supported on OSv if shared library runs
on new thread. To address this limitation this patch introduces
changes that allow delaying ELF initialization until new thread is run.

Secondly this patch provides changes to ELF initialization
code that allow passing argv and other data to shared
object init functions as explained by #795. This is for example
required by Golang runtime.

Fixes #795

The content of this patch was authored by Benoit Canet.

Signed-off-by: Waldemar Kozaczuk <jwkozac...@gmail.com>
---
 core/app.cc        | 115 +++++++++++++++++++++++++++++++++++++----------------
 core/elf.cc        |  63 ++++++++++++++++++++++-------
 include/osv/app.hh |  13 ++++++
 include/osv/elf.hh |  21 +++++++++-
 4 files changed, 161 insertions(+), 51 deletions(-)

diff --git a/core/app.cc b/core/app.cc
index c5d152b..d9a4c6a 100644
--- a/core/app.cc
+++ b/core/app.cc
@@ -169,7 +169,31 @@ application::application(const std::string& command,
         }
 
         merge_in_environ(new_program, env);
-        _lib = current_program->get_library(_command);
+        prepare_argv(current_program);
+        //
+        // Some applications (specifically Golang ones) during initialization 
of it's ELF object
+        // invoke init functions that access variables from TLS (Thread Local 
Storage) memory
+        // area by offset determined during compilation. This type of TLS 
access is called
+        // "initial exec" (or static) and is best explained by the Ulrich 
Drepper's paper -
+        // https://www.uclibc.org/docs/tls.pdf.
+        //
+        // In essence library code accesses thread-local variables located in 
application static
+        // TLS memory area that belongs to the current thread. The static TLS 
memory area is
+        // setup by thread::setup_tcb() during thread construction phase based 
on TLS templates
+        // of the corresponding application ELF object. The TLS templates are 
captured by
+        // object::init_static_tls() (called by program::get_library()) but 
only used when
+        // creating new threads to run the application code. So the TLS memory 
area of the current
+        // thread is not affected.
+        //
+        // Hopefully it is clear by now that the TLS memory area of the thread 
invoking this constructor,
+        // which typically is a parent to new application thread yet to be 
started, is not setup
+        // and therefore should not be accessed by init functions during ELF 
object initialization.
+        // In order to overcome this constraint, which by the way is well 
explained by the
+        // issue #810 and very hard to fix correctly, we need to delay 
initialization of the application
+        // ELF object and pass delay_init set to true (3rd argument) to the 
get_library method below.
+        // The ELF object will be initialized by explicitly calling 
program::init_library() from
+        // application::main() invoked by new thread later.
+        _lib = current_program->get_library(_command, {}, true);
     } catch (const launch_error &e) {
         throw;
     } catch (const std::exception &e) {
@@ -274,7 +298,12 @@ TRACEPOINT(trace_app_main_ret, "return_code=%d", int);
 void application::main()
 {
     __libc_stack_end = __builtin_frame_address(0);
-
+    //
+    // Explicitly initialize the application ELF object which would have been
+    // loaded earlier most likely by parent thread in application constructor.
+    // Effectively the ELF initialization has been delayed until this moment
+    // for reasons explained in application::application().
+    elf::get_program()->init_library(_args.size(), _argv.get());
     sched::thread::current()->set_name(_command);
 
     if (_main) {
@@ -292,65 +321,81 @@ void application::main()
     // _entry_point() doesn't return
 }
 
-void application::run_main(std::string path, int argc, char** argv)
+void application::prepare_argv(elf::program *program)
 {
-    char *c_path = (char *)(path.c_str());
-    // path is guaranteed to keep existing this function
+    // Prepare program_* variable used by the libc
+    char *c_path = (char *)(_command.c_str());
     program_invocation_name = c_path;
     program_invocation_short_name = basename(c_path);
 
-    unsigned sz = argc; // for the trailing 0's.
-    for (int i = 0; i < argc; ++i) {
-        sz += strlen(argv[i]);
+    // Allocate a continuous buffer for arguments: _argv_buf
+    // First count the trailing zeroes
+    auto sz = _args.size();
+    // Then add the sum of each argument size to sz
+    for (auto &str: _args) {
+        sz += str.size();
     }
+    _argv_buf.reset(new char[sz]);
 
-    std::unique_ptr<char []> argv_buf(new char[sz]);
-    char *ab = argv_buf.get();
     // In Linux, the pointer arrays argv[] and envp[] are continguous.
     // Unfortunately, some programs rely on this fact (e.g., libgo's
     // runtime_goenvs_unix()) so it is useful that we do this too.
+
+    // First count the number of environment variables
     int envcount = 0;
     while (environ[envcount]) {
         envcount++;
     }
-    char *contig_argv[argc + 1 + envcount + 1];
 
-    for (int i = 0; i < argc; ++i) {
-        size_t asize = strlen(argv[i]);
-        memcpy(ab, argv[i], asize);
-        ab[asize] = '\0';
+    // Allocate the continuous buffer for argv[] and envp[]
+    _argv.reset(new char*[_args.size() + 1 + envcount + 1 + 
sizeof(Elf64_auxv_t) * 3]);
+
+    // Fill the argv part of these buffers
+    char *ab = _argv_buf.get();
+    char **contig_argv = _argv.get();
+    for (size_t i = 0; i < _args.size(); i++) {
+       auto &str = _args[i];
+        memcpy(ab, str.c_str(), str.size());
+        ab[str.size()] = '\0';
         contig_argv[i] = ab;
-        ab += asize + 1;
+        ab += str.size() + 1;
     }
-    contig_argv[argc] = nullptr;
+    contig_argv[_args.size()] = nullptr;
 
+    // Do the same for environ
     for (int i = 0; i < envcount; i++) {
-        contig_argv[argc + 1 + i] = environ[i];
+        contig_argv[_args.size() + 1 + i] = environ[i];
     }
-    contig_argv[argc + 1 + envcount] = nullptr;
+    contig_argv[_args.size() + 1 + envcount] = nullptr;
 
-    // make sure to have a fresh optind across calls
-    // FIXME: fails if run() is executed in parallel
-    int old_optind = optind;
-    optind = 0;
-    _return_code = _main(argc, contig_argv);
-    optind = old_optind;
+    _libvdso = program->get_library("libvdso.so");
+    if (!_libvdso) {
+        abort("could not load libvdso.so\n");
+    }
+
+    // Pass the VDSO library to the application.
+    Elf64_auxv_t* _auxv =
+        reinterpret_cast<Elf64_auxv_t *>(&contig_argv[_args.size() + 1 + 
envcount + 1]);
+    _auxv[0].a_type = AT_SYSINFO_EHDR;
+    _auxv[0].a_un.a_val = reinterpret_cast<uint64_t>(_libvdso->base());
+
+    _auxv[1].a_type = AT_PAGESZ;
+    _auxv[1].a_un.a_val = sysconf(_SC_PAGESIZE);
+
+    _auxv[2].a_type = AT_NULL;
+    _auxv[2].a_un.a_val = 0;
 }
 
 void application::run_main()
 {
     trace_app_main(this, _command.c_str());
 
-    // C main wants mutable arguments, so we have can't use strings directly
-    std::vector<std::vector<char>> mut_args;
-    transform(_args, back_inserter(mut_args),
-            [](std::string s) { return std::vector<char>(s.data(), s.data() + 
s.size() + 1); });
-    std::vector<char*> argv;
-    transform(mut_args.begin(), mut_args.end(), back_inserter(argv),
-            [](std::vector<char>& s) { return s.data(); });
-    auto argc = argv.size();
-    argv.push_back(nullptr);
-    run_main(_command, argc, argv.data());
+    // make sure to have a fresh optind across calls
+    // FIXME: fails if run() is executed in parallel
+    int old_optind = optind;
+    optind = 0;
+    _return_code = _main(_args.size(), _argv.get());
+    optind = old_optind;
 
     if (_return_code) {
         debug("program %s returned %d\n", _command.c_str(), _return_code);
diff --git a/core/elf.cc b/core/elf.cc
index 2a46633..1ec841f 100644
--- a/core/elf.cc
+++ b/core/elf.cc
@@ -937,19 +937,21 @@ std::string object::pathname()
 }
 
 // Run the object's static constructors or similar initialization
-void object::run_init_funcs()
+void object::run_init_funcs(int argc, char** argv)
 {
+    // Invoke any init functions if present and pass in argc and argv
+    // The reason why we pass argv and argc is explained in issue #795
     if (dynamic_exists(DT_INIT)) {
         auto func = dynamic_ptr<void>(DT_INIT);
         if (func) {
-            reinterpret_cast<void(*)()>(func)();
+            reinterpret_cast<void(*)(int, char**)>(func)(argc, argv);
         }
     }
     if (dynamic_exists(DT_INIT_ARRAY)) {
-        auto funcs = dynamic_ptr<void (*)()>(DT_INIT_ARRAY);
+        auto funcs = dynamic_ptr<void(*)(int, char**)>(DT_INIT_ARRAY);
         auto nr = dynamic_val(DT_INIT_ARRAYSZ) / sizeof(*funcs);
         for (auto i = 0u; i < nr; ++i) {
-            funcs[i]();
+            funcs[i](argc, argv);
         }
     }
 }
@@ -1176,27 +1178,60 @@ program::load_object(std::string name, 
std::vector<std::string> extra_path,
 }
 
 std::shared_ptr<object>
-program::get_library(std::string name, std::vector<std::string> extra_path)
+program::get_library(std::string name, std::vector<std::string> extra_path, 
bool delay_init)
 {
     SCOPE_LOCK(_mutex);
+    //
+    // Shared library needs to be initialized before any of its symbols 
(function or variable)
+    // is accessed. The initialization involves invoking so called init 
functions and is handled by
+    // the init_library() method below. The parameter delay_init determines 
whether init_library is
+    // called right away or at some arbitrary time later.
+    //
+    // Because init_library() needs to access the library object itself and 
it's dependencies possibly
+    // later we push the loaded objects list on a _loaded_objects_stack member 
variable of the program.
+    //
+    // Since a library can load another one like java.so does in OSv, we want 
a stack
+    // structure so each init_library call gets it's corresponding list of 
objects to operate on.
+    //
     std::vector<std::shared_ptr<object>> loaded_objects;
     auto ret = load_object(name, extra_path, loaded_objects);
+    _loaded_objects_stack.push(loaded_objects);
+
     if (ret) {
         ret->init_static_tls();
     }
-    // After loading the object and all its needed objects, run these objects'
-    // init functions in reverse order (so those of deepest needed object runs
-    // first) and finally make the loaded objects visible in search order.
-    auto size = loaded_objects.size();
-    for (int i = size - 1; i >= 0; i--) {
-        loaded_objects[i]->run_init_funcs();
-    }
-    for (unsigned i = 0; i < size; i++) {
-        loaded_objects[i]->setprivate(false);
+
+    if (!delay_init) {
+        init_library();
     }
+
     return ret;
 }
 
+void program::init_library(int argc, char** argv)
+{
+    // Get the list of pointers to shared objects from stack before iterating 
on them
+    if(!_loaded_objects_stack.empty()) {
+        std::vector<std::shared_ptr<object>> loaded_objects =
+                _loaded_objects_stack.top();
+        //
+        // After loading the object and all its needed objects, run these 
objects'
+        // init functions in reverse order (so those of deepest needed object 
runs
+        // first) and finally make the loaded objects visible in search order.
+        auto size = loaded_objects.size();
+        for (unsigned i = 0; i < size; i++) {
+            loaded_objects[i]->setprivate(true);
+        }
+        for (int i = size - 1; i >= 0; i--) {
+            loaded_objects[i]->run_init_funcs(argc, argv);
+        }
+        for (unsigned i = 0; i < size; i++) {
+            loaded_objects[i]->setprivate(false);
+        }
+        _loaded_objects_stack.pop();
+    }
+}
+
 void program::remove_object(object *ef)
 {
     SCOPE_LOCK(_mutex);
diff --git a/include/osv/app.hh b/include/osv/app.hh
index 1a39e32..73a1b38 100644
--- a/include/osv/app.hh
+++ b/include/osv/app.hh
@@ -20,6 +20,10 @@
 #include <unordered_map>
 #include <string>
 
+#include "musl/include/elf.h"
+#undef AT_UID // prevent collisions
+#undef AT_GID
+
 extern "C" void __libc_start_main(int(*)(int, char**), int, char**, void(*)(),
     void(*)(), void(*)(), void*);
 
@@ -203,6 +207,7 @@ private:
     void start_and_join(waiter* setup_waiter);
     void main();
     void run_main(std::string path, int argc, char** argv);
+    void prepare_argv(elf::program *program);
     void run_main();
     friend void ::__libc_start_main(int(*)(int, char**), int, char**, 
void(*)(),
         void(*)(), void(*)(), void*);
@@ -219,10 +224,18 @@ private:
     mutex _termination_mutex;
     std::shared_ptr<elf::object> _lib;
     std::shared_ptr<elf::object> _libenviron;
+    std::shared_ptr<elf::object> _libvdso;
     main_func_t* _main;
     void (*_entry_point)();
     static app_registry apps;
 
+    // _argv is set by prepare_argv() called from the constructor and needs to 
be
+    // retained as member variable so that it later can be passed as argument 
by either
+    // application::main and application::run_main() or 
application::run_main() called
+    // from __libc_start_main()
+    std::unique_ptr<char *[]> _argv;
+    std::unique_ptr<char []> _argv_buf; // actual arguments content _argv 
points to
+
     // Must be destroyed before _lib, because contained function objects may
     // have destructors which are part of the application's code.
     std::list<std::function<void()>> _termination_request_callbacks;
diff --git a/include/osv/elf.hh b/include/osv/elf.hh
index fabc6e6..e0966c4 100644
--- a/include/osv/elf.hh
+++ b/include/osv/elf.hh
@@ -11,6 +11,7 @@
 #include "fs/fs.hh"
 #include <vector>
 #include <map>
+#include <stack>
 #include <memory>
 #include <unordered_set>
 #include <osv/types.h>
@@ -334,7 +335,7 @@ public:
     const std::vector<Elf64_Phdr> *phdrs();
     std::string soname();
     std::string pathname();
-    void run_init_funcs();
+    void run_init_funcs(int argc, char** argv);
     void run_fini_funcs();
     template <typename T = void>
     T* lookup(const char* name);
@@ -523,9 +524,22 @@ public:
      * \param[in] extra_path  Additional directories to search in addition to
      *                        the default search path which is set with
      *                        set_search_path().
+     * \param[in] delay_init  If true the init functions in the library and its
+     *                        dependencies will not be executed until some 
later
+     *                        time when the init_library() is called. By 
default
+     *                        the init functions are executed right away.
      */
     std::shared_ptr<elf::object>
-    get_library(std::string lib, std::vector<std::string> extra_path = {});
+    get_library(std::string lib, std::vector<std::string> extra_path = {}, 
bool delay_init = false);
+
+    /**
+     * Execute init functions of the library itself and its dependencies.
+     *
+     * Any arguments passed in are relayed to the init functions. Right now
+     * the only place that explicitly invokes init_library is 
application::main()
+     * method which also passes any argv passed to the application.
+     */
+    void init_library(int argc = 0, char **argv = nullptr);
 
     /**
      * Set the default search path for get_library().
@@ -596,6 +610,9 @@ private:
 
     friend elf::file::~file();
     friend class object;
+    // this allows the objects resolved by get_library() get initialized
+    // by init_library() at arbitrary time later - the delayed initialization 
scenario
+    std::stack<std::vector<std::shared_ptr<object>>> _loaded_objects_stack;
 };
 
 void create_main_program();
-- 
2.7.4

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
For more options, visit https://groups.google.com/d/optout.

Reply via email to