The strtok_r doesn't allow escaping the input parameters, which means
each space is considered a separator. The boost::tokenizer supports
escaping. This commit replaces strtok_r with boost::tokenizer. The
final __argv is still continus in memory, as expected by some programs.

Spaces can be quoted with " or with ', and " or ' can be escaped with \.

(partially) fixes #892

Signed-off-by: Justin Cinkelj <[email protected]>
---
 core/commands.cc | 52 ++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 42 insertions(+), 10 deletions(-)

diff --git a/core/commands.cc b/core/commands.cc
index 6287d76..86b25c2 100644
--- a/core/commands.cc
+++ b/core/commands.cc
@@ -19,6 +19,8 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
+#include <string>
+#include <boost/tokenizer.hpp>
 
 namespace qi = boost::spirit::qi;
 namespace ascii = boost::spirit::ascii;
@@ -320,13 +322,8 @@ std::string getcmdline()
 
 int parse_cmdline(const char *p)
 {
-    char* save;
-
     if (args.size()) {
-        // From the strtok manpage, we see that: "The first call to strtok()
-        // sets this pointer to point to the first byte of the string." It
-        // follows from this that the first argument contains the address we
-        // should use to free the memory allocated for the string
+        // free date returend by malloc, and stored into args_data
         free(args[0]);
     }
 
@@ -336,16 +333,51 @@ int parse_cmdline(const char *p)
     }
     osv_cmdline = strdup(p);
 
-    char* cmdline = strdup(p);
+    std::string escSep("\\"); // escape character
+    std::string delim(" \t\n"); // split on spaces, tab, newline
+    std::string quote("\"'"); // allow quoted arguments
+    boost::escaped_list_separator<char> esc(escSep, delim, quote);
+    typedef boost::tokenizer<boost::escaped_list_separator<char>> tokenizer;
+
+    // Split cmdline into tokens.
+    std::string cmdline = p;
+    tokenizer token{cmdline, esc};
+    // args2 has same content as __argv, but strings in the later are also
+    // consecutive in memory.
+    std::vector<char*> args2;
+    for (const auto &tt : token) {
+        // strdup will create copy of temporal variable tt.
+        args2.push_back(strdup(tt.c_str()));
+    }
 
-    while ((p = strtok_r(cmdline, " \t\n", &save)) != nullptr) {
-        args.push_back(const_cast<char *>(p));
-        cmdline = nullptr;
+    // Copy content from args2 to one large array args_data - string with 
multiple '\0' inside.
+    // args than contains pointers into args_data.
+    size_t sz = 0;
+    for (const auto arg: args2) {
+        sz += strlen(arg) + 1;
+    }
+    char* args_data = (char*)malloc(sz);
+    size_t pos = 0;
+    for (const auto arg: args2) {
+        memcpy(args_data + pos, arg, strlen(arg) + 1);
+        args.push_back(args_data + pos);
+        pos += strlen(arg) + 1;
+        free(arg);
     }
+    assert(pos == sz);
     args.push_back(nullptr);
+
     __argv = args.data();
     __argc = args.size() - 1;
 
+#if 1
+    int ii = 0;
+    char** ch;
+    for (ch = __argv; *ch != nullptr; ch++, ii++) {
+        printf("DBG av[%d] = '%s'\n", ii, *ch);
+    }
+#endif
+
     return 0;
 }
 
-- 
2.9.4

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/d/optout.

Reply via email to