Example debug output:
./scripts/run.py -e "blah aa bb \"asdf tt\" 'a \\\"b c\\\" d' "
OSv v0.24-431-g67ecd81
DBG av[0] = 'blah'
DBG av[1] = 'aa'
DBG av[2] = 'bb'
DBG av[3] = 'asdf tt'
DBG av[4] = 'a "b c" d'
DBG av[5] = ''
eth0: 192.168.122.15
Failed to load object: blah. Powering off.

I forgot to change "#if 1" to "#if 0" in debug output.

Justin

On 08/21/2017 06:01 PM, Justin Cinkelj wrote:
The strtok_r doesn't allow escaping the input parameters, which means
each space is considered a separator. The boost::tokenizer supports
escaping. This commit replaces strtok_r with boost::tokenizer. The
final __argv is still continus in memory, as expected by some programs.

Spaces can be quoted with " or with ', and " or ' can be escaped with \.

(partially) fixes #892

Signed-off-by: Justin Cinkelj <[email protected]>
---
  core/commands.cc | 52 ++++++++++++++++++++++++++++++++++++++++++----------
  1 file changed, 42 insertions(+), 10 deletions(-)

diff --git a/core/commands.cc b/core/commands.cc
index 6287d76..86b25c2 100644
--- a/core/commands.cc
+++ b/core/commands.cc
@@ -19,6 +19,8 @@
  #include <sys/types.h>
  #include <sys/stat.h>
  #include <fcntl.h>
+#include <string>
+#include <boost/tokenizer.hpp>
namespace qi = boost::spirit::qi;
  namespace ascii = boost::spirit::ascii;
@@ -320,13 +322,8 @@ std::string getcmdline()
int parse_cmdline(const char *p)
  {
-    char* save;
-
      if (args.size()) {
-        // From the strtok manpage, we see that: "The first call to strtok()
-        // sets this pointer to point to the first byte of the string." It
-        // follows from this that the first argument contains the address we
-        // should use to free the memory allocated for the string
+        // free date returend by malloc, and stored into args_data
          free(args[0]);
      }
@@ -336,16 +333,51 @@ int parse_cmdline(const char *p)
      }
      osv_cmdline = strdup(p);
- char* cmdline = strdup(p);
+    std::string escSep("\\"); // escape character
+    std::string delim(" \t\n"); // split on spaces, tab, newline
+    std::string quote("\"'"); // allow quoted arguments
+    boost::escaped_list_separator<char> esc(escSep, delim, quote);
+    typedef boost::tokenizer<boost::escaped_list_separator<char>> tokenizer;
+
+    // Split cmdline into tokens.
+    std::string cmdline = p;
+    tokenizer token{cmdline, esc};
+    // args2 has same content as __argv, but strings in the later are also
+    // consecutive in memory.
+    std::vector<char*> args2;
+    for (const auto &tt : token) {
+        // strdup will create copy of temporal variable tt.
+        args2.push_back(strdup(tt.c_str()));
+    }
- while ((p = strtok_r(cmdline, " \t\n", &save)) != nullptr) {
-        args.push_back(const_cast<char *>(p));
-        cmdline = nullptr;
+    // Copy content from args2 to one large array args_data - string with 
multiple '\0' inside.
+    // args than contains pointers into args_data.
+    size_t sz = 0;
+    for (const auto arg: args2) {
+        sz += strlen(arg) + 1;
+    }
+    char* args_data = (char*)malloc(sz);
+    size_t pos = 0;
+    for (const auto arg: args2) {
+        memcpy(args_data + pos, arg, strlen(arg) + 1);
+        args.push_back(args_data + pos);
+        pos += strlen(arg) + 1;
+        free(arg);
      }
+    assert(pos == sz);
      args.push_back(nullptr);
+
      __argv = args.data();
      __argc = args.size() - 1;
+#if 1
+    int ii = 0;
+    char** ch;
+    for (ch = __argv; *ch != nullptr; ch++, ii++) {
+        printf("DBG av[%d] = '%s'\n", ii, *ch);
+    }
+#endif
+
      return 0;
  }

--
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/d/optout.

Reply via email to