loader_parse_cmdline accepts input str - OSv commandline, say:
--env=AA=aa --env=BB=bb1\ bb2 app.so arg1 arg2

The loader options are parsed and saved into argv, up to first not-loader-option
token. argc is set to number of loader options.
app_cmdline is set to unconsumed part of input str.

Note: quoting loader options with space is not supported.
This is reason to left commandline beyond loader options unmodified,
so it can be parsed later, by code with support for quoting.

Signed-off-by: Justin Cinkelj <justin.cink...@xlab.si>
---
 core/commands.cc        | 113 ++++++++++++++++++++++++++
 include/osv/commands.hh |   1 +
 tests/tst-commands.cc   | 211 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 325 insertions(+)

diff --git a/core/commands.cc b/core/commands.cc
index 6287d76..22024fa 100644
--- a/core/commands.cc
+++ b/core/commands.cc
@@ -318,6 +318,119 @@ std::string getcmdline()
     return std::string(osv_cmdline);
 }
 
+#define my_debug if(0) printf
+/*
+loader_parse_cmdline accepts input str - OSv commandline, say:
+--env=AA=aa --env=BB=bb1\ bb2 app.so arg1 arg2
+
+The loader options are parsed and saved into argv, up to first 
not-loader-option
+token. argc is set to number of loader options.
+app_cmdline is set to unconsumed part of input str.
+Example output:
+argc = 2
+argv[0] = "--env=AA=aa"
+argv[1] = "--env=BB=bb1 bb2"
+argv[2] = NULL
+app_cmdline = "app.so arg1 arg2"
+
+The whitespaces can be escaped with '\' to allow options with spaces.
+Notes:
+ - _quoting_ loader options with space is not supported.
+ - input str is modified.
+ - output argv has to be free-d by caller.
+*/
+void loader_parse_cmdline(char* str, int *pargc, char*** pargv, char** 
app_cmdline) {
+    *pargv = nullptr;
+    *pargc = 0;
+    *app_cmdline = nullptr;
+
+    const char *delim = " \t\n";
+    char esc = '\\';
+
+    // parse string
+    char *ap;
+    char *ap0=nullptr, *apE=nullptr; // first and last token.
+    int ntoken = 0;
+    ap0 = nullptr;
+    while(1) {
+        // Did we already consume all loader options?
+        // Look at first non-space char - if =='-', than this is loader option.
+        // Otherwise, it is application command.
+        char *ch = str;
+        while (ch && *ch != '\0') {
+            if (strchr(delim, *ch)) {
+                ch++;
+                continue;
+            }
+            else if (*ch == '-') {
+                // this is a loader option, continue with loader parsing
+                break;
+            }
+            else {
+                // ch is not space or '-', it is start of application command
+                // Save current position and stop loader parsing.
+                *app_cmdline = str;
+                break;
+            }
+        }
+        if (*ch == '\0') {
+            // empty str, contains only spaces
+            *app_cmdline = str;
+        }
+        if (*app_cmdline) {
+            break;
+        }
+        // there are loader options, continue with parsing
+
+        ap = stresep(&str, delim, esc);
+        assert(ap);
+
+        my_debug("  ap = %p %s, *ap=%d\n", ap, ap, *ap);
+        if (*ap != '\0') {
+            // valid token found
+            ntoken++;
+            if (ap0 == nullptr) {
+                ap0 = ap;
+            }
+            apE = ap;
+        }
+        else {
+            // Multiple consecutive delimiters found. Stresep will write 
multiple
+            // '\0' into str. Squash them into one, so that argv will be 
'nice',
+            // in memory consecutive array of C strings.
+            if (str) {
+                my_debug("    shift   str %p '%s' <- %p '%s'\n", str-1, str-1, 
str, str);
+                memmove(str-1, str, strlen(str) + 1);
+                str--;
+            }
+        }
+        if (str == nullptr) {
+            // end of string, last char was delimiter
+            *app_cmdline = ap + strlen(ap); // make app_cmdline valid pointer 
to '\0'.
+            my_debug("    make app_cmdline valid pointer to '\\0' ap=%p '%s', 
app_cmdline=%p '%s'\n",
+                ap, ap, app_cmdline, app_cmdline);
+            break;
+        }
+
+    }
+    my_debug("  ap0 = %p '%s', apE = %p '%s', ntoken = %d, app_cmdline=%p 
'%s'\n",
+        ap0, ap0, apE, apE, ntoken, *app_cmdline, *app_cmdline);
+    *pargv = (char**)malloc(sizeof(char*) * (ntoken+1));
+    // str was modified, tokes are separated by exactly one '\0'
+    int ii;
+    for(ap = ap0, ii = 0; ii < ntoken; ap += strlen(ap)+1, ii++) {
+        assert(ap != nullptr);
+        assert(*ap != '\0');
+        my_debug("  argv[%d] = %p %s\n", ii, ap, ap);
+        (*pargv)[ii] = ap;
+    }
+    my_debug("  ntoken = %d, ii = %d\n", ntoken, ii);
+    assert(ii == ntoken);
+    (*pargv)[ii] = nullptr;
+    *pargc = ntoken;
+}
+#undef my_debug
+
 int parse_cmdline(const char *p)
 {
     char* save;
diff --git a/include/osv/commands.hh b/include/osv/commands.hh
index be3aca8..0693d28 100644
--- a/include/osv/commands.hh
+++ b/include/osv/commands.hh
@@ -25,6 +25,7 @@ parse_command_line(const std::string line, bool &ok);
 std::string getcmdline();
 int parse_cmdline(const char *p);
 void save_cmdline(std::string newcmd);
+void loader_parse_cmdline(char* str, int *pargc, char*** pargv, char** 
app_cmdline);
 }
 
 #endif // !__OSV_COMMANDS_HH__
diff --git a/tests/tst-commands.cc b/tests/tst-commands.cc
index ea0cc07..78f4155 100644
--- a/tests/tst-commands.cc
+++ b/tests/tst-commands.cc
@@ -11,6 +11,7 @@
 #include <osv/commands.hh>
 #include <fstream>
 #include <map>
+#include <string.h>
 
 static int tests = 0, fails = 0;
 
@@ -875,8 +876,218 @@ static bool 
test_runscript_with_conditional_env_in_script(bool set_env_vars_befo
     return true;
 }
 
+bool test_loader_parse_cmdline(const char* instr, std::vector<std::string> 
ref_argv, const char* ref_app_cmdline) {
+    char *str, *str_to_be_freed;
+    // strdup alternative code might catch read beyond end of str string.
+    // The strdup above might contains \0 beyond terminating '\0', so say
+    // strlen(str+strlen(str) + 1) still returns 0, or some random number
+    // instead of scanning random garbage.
+#if 0
+    *str = strdup(instr);
+    str_to_be_freed = str;
+#else
+    int str_length = std::max(strlen(instr), 1024ul);
+    str = (char*)malloc(str_length*9);
+    str_to_be_freed = str;
+    memset(str, 'X', str_length*9);
+    str += str_length*4;
+    strcpy(str, instr);
+#endif
+
+    int argc;
+    char** argv;
+    char *app_cmdline;
+
+    //printf("/*-------------------------------------*/\n");
+    //printf("str = %p '%s'\n", str, str);
+    osv::loader_parse_cmdline(str, &argc, &argv, &app_cmdline);
+
+    // print and check result
+    char **ch;
+    int ii;
+    int old_len = 0;
+    if (argc != (int)ref_argv.size()) {
+        return false;
+    }
+    for (ii = 0, ch = argv; ch != nullptr && *ch != nullptr; ii++, ch++) {
+        //printf("  argv[%d] = %p '%s', expected = '%s'\n", ii, *ch, *ch, 
ref_argv[ii].c_str());
+        if (ref_argv[ii] != argv[ii]) {
+            return false;
+        }
+        if(ii>0) {
+            // check that argv strings are consecutive in memory
+            // not really needed for loader options, but common implementation
+            // detail for Linux app main(argc, argv).
+            if ((argv[ii-1] + old_len) != argv[ii]) {
+                return false;
+            }
+        }
+        old_len = strlen(argv[ii]) + 1; // num of bytes including terminating 
null.
+    }
+
+    //printf("  ii = %d, ref_argv.size()=%d\n", ii, (int)ref_argv.size());
+    if (ii != argc) {
+        return false;
+    }
+    //printf("  app_cmdline=%p '%s', expected = '%s'\n", app_cmdline, 
app_cmdline, ref_app_cmdline);
+    if (std::string(app_cmdline) != ref_app_cmdline) {
+        return false;
+    }
+
+    free(argv);
+    free(str_to_be_freed);
+    //printf("/*-------------------------------------*/\n");
+    return true;
+}
+
+#define STRINGIZE(x) STRINGIZE2(x)
+#define STRINGIZE2(x) #x
+#define LINE_STRING STRINGIZE(__LINE__)
+
+void all_test_loader_parse_cmdline() {
+    // empty
+    report(test_loader_parse_cmdline("", {}, ""),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline(" ", {}, " "),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline("  ", {}, "  "),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    //
+    report(test_loader_parse_cmdline("-", {"-"}, ""),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline("--", {"--"}, ""),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline("-- ", {"--"}, ""),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline("--  ", {"--"}, " "),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline("--   ", {"--"}, "  "),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+
+    report(test_loader_parse_cmdline("aa", {}, "aa"),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline(" aa", {}, " aa"),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline("aa ", {}, "aa " ),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline(" aa ", {}, " aa "),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline("  aa  ", {}, "  aa  "),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    //
+    report(test_loader_parse_cmdline("--aa", {"--aa"}, ""),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline(" --aa", {"--aa"}, ""),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline("--aa ", {"--aa"}, "" ),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline(" --aa ", {"--aa"}, ""),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline("  --aa  ", {"--aa"}, " "),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+
+    report(test_loader_parse_cmdline("aa bb", {}, "aa bb"),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline("aa  bb", {}, "aa  bb"),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline("aa   bb", {}, "aa   bb"),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline("aa    bb", {}, "aa    bb"),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    //
+    report(test_loader_parse_cmdline("--aa --bb", {"--aa", "--bb"}, ""),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline("--aa  --bb", {"--aa", "--bb"}, ""),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline("--aa   --bb", {"--aa", "--bb"}, ""),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline("--aa    --bb", {"--aa", "--bb"}, ""),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+
+    report(test_loader_parse_cmdline("aa bb ", {}, "aa bb "),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline("aa bb  ", {}, "aa bb  "),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline("aa bb   ", {}, "aa bb   "),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    //
+    report(test_loader_parse_cmdline("--aa --bb ", {"--aa", "--bb"}, ""),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline("--aa --bb  ", {"--aa", "--bb"}, " "),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline("--aa --bb   ", {"--aa", "--bb"}, "  "),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+
+    report(test_loader_parse_cmdline(" aa bb", {}, " aa bb"),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline("  aa bb", {}, "  aa bb"),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline("   aa bb", {}, "   aa bb"),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    //
+    report(test_loader_parse_cmdline(" --aa --bb", {"--aa", "--bb"}, ""),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline("  --aa --bb", {"--aa", "--bb"}, ""),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline("   --aa --bb", {"--aa", "--bb"}, ""),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+
+    report(test_loader_parse_cmdline(" aa bb ", {}, " aa bb "),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline("  aa bb  ", {}, "  aa bb  "),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline("   aa bb   ", {}, "   aa bb   "),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    //
+    report(test_loader_parse_cmdline(" --aa --bb ", {"--aa", "--bb"}, ""),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline("  --aa --bb  ", {"--aa", "--bb"}, " "),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline("   --aa --bb   ", {"--aa", "--bb"}, "  
"),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+
+    report(test_loader_parse_cmdline("--aa --bb cc", {"--aa", "--bb"}, "cc"),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline(" --aa --bb cc", {"--aa", "--bb"}, "cc"),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline("  --aa --bb  cc", {"--aa", "--bb"}, " 
cc"),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline("   --aa --bb   cc", {"--aa", "--bb"}, "  
cc"),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+
+    report(test_loader_parse_cmdline("aa \"bb\" cc", {}, "aa \"bb\" cc"),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline("aa bb\\ cc dd", {}, "aa bb\\ cc dd"),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline("aa bb\\ \\ cc dd", {}, "aa bb\\ \\ cc 
dd"),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    //
+    report(test_loader_parse_cmdline("--aa --\"bb\" --cc", {"--aa", 
"--\"bb\"", "--cc"}, ""),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline("--aa \"bb\" --cc", {"--aa"}, "\"bb\" 
--cc"),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline("--aa --bb\\ \\ cc --dd", {"--aa", "--bb  
cc", "--dd"}, ""),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline("--aa --bb\\ \\ --cc --dd", {"--aa", 
"--bb  --cc", "--dd"}, ""),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline("--aa --bb\\ cc --dd", {"--aa", "--bb 
cc", "--dd"}, ""),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline("--aa --bb\\ --cc --dd", {"--aa", "--bb 
--cc", "--dd"}, ""),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+
+    // and realistic/valid OSv cmdline example
+    report(test_loader_parse_cmdline("--env=AA=aa  --env=BB=bb1\\ bb2   
--env=CC=cc1\\ \\ cc2\\ cc3 prog arg1 \"arg2a arg2b\" arg3",
+        {"--env=AA=aa", "--env=BB=bb1 bb2", "--env=CC=cc1  cc2 cc3"}, "prog 
arg1 \"arg2a arg2b\" arg3"),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+    report(test_loader_parse_cmdline("--env=AA=aa  --env=BB=bb1\\ bb2   
--env=CC=cc1\\ \\ cc2\\ cc3   prog  arg1 \"arg2a  arg2b\"  arg3  ",
+        {"--env=AA=aa", "--env=BB=bb1 bb2", "--env=CC=cc1  cc2 cc3"}, "  prog  
arg1 \"arg2a  arg2b\"  arg3  "),
+        "TEST=loader_parse_cmdline:LINE=" LINE_STRING);
+}
+
 int main(int argc, char *argv[])
 {
+    all_test_loader_parse_cmdline();
+
     report(test_parse_simplest(), "simplest command line");
     report(test_parse_simplest_with_args(), "simplest command line with args");
     report(test_parse_simplest_with_quotes(),
-- 
2.9.4

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
For more options, visit https://groups.google.com/d/optout.

Reply via email to