The strtok_r doesn't allow escaping the input parameters, which means each space is considered a separator. The boost::tokenizer supports escaping. This commit replaces strtok_r with boost::tokenizer. The final __argv is still continus in memory, as expected by some programs.
Spaces can be quoted with " or with ', and " or ' can be escaped with \. (partially) fixes #892 Signed-off-by: Justin Cinkelj <[email protected]> --- core/commands.cc | 52 ++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 42 insertions(+), 10 deletions(-) diff --git a/core/commands.cc b/core/commands.cc index 6287d76..86b25c2 100644 --- a/core/commands.cc +++ b/core/commands.cc @@ -19,6 +19,8 @@ #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> +#include <string> +#include <boost/tokenizer.hpp> namespace qi = boost::spirit::qi; namespace ascii = boost::spirit::ascii; @@ -320,13 +322,8 @@ std::string getcmdline() int parse_cmdline(const char *p) { - char* save; - if (args.size()) { - // From the strtok manpage, we see that: "The first call to strtok() - // sets this pointer to point to the first byte of the string." It - // follows from this that the first argument contains the address we - // should use to free the memory allocated for the string + // free date returend by malloc, and stored into args_data free(args[0]); } @@ -336,16 +333,51 @@ int parse_cmdline(const char *p) } osv_cmdline = strdup(p); - char* cmdline = strdup(p); + std::string escSep("\\"); // escape character + std::string delim(" \t\n"); // split on spaces, tab, newline + std::string quote("\"'"); // allow quoted arguments + boost::escaped_list_separator<char> esc(escSep, delim, quote); + typedef boost::tokenizer<boost::escaped_list_separator<char>> tokenizer; + + // Split cmdline into tokens. + std::string cmdline = p; + tokenizer token{cmdline, esc}; + // args2 has same content as __argv, but strings in the later are also + // consecutive in memory. + std::vector<char*> args2; + for (const auto &tt : token) { + // strdup will create copy of temporal variable tt. + args2.push_back(strdup(tt.c_str())); + } - while ((p = strtok_r(cmdline, " \t\n", &save)) != nullptr) { - args.push_back(const_cast<char *>(p)); - cmdline = nullptr; + // Copy content from args2 to one large array args_data - string with multiple '\0' inside. + // args than contains pointers into args_data. + size_t sz = 0; + for (const auto arg: args2) { + sz += strlen(arg) + 1; + } + char* args_data = (char*)malloc(sz); + size_t pos = 0; + for (const auto arg: args2) { + memcpy(args_data + pos, arg, strlen(arg) + 1); + args.push_back(args_data + pos); + pos += strlen(arg) + 1; + free(arg); } + assert(pos == sz); args.push_back(nullptr); + __argv = args.data(); __argc = args.size() - 1; +#if 1 + int ii = 0; + char** ch; + for (ch = __argv; *ch != nullptr; ch++, ii++) { + printf("DBG av[%d] = '%s'\n", ii, *ch); + } +#endif + return 0; } -- 2.9.4 -- You received this message because you are subscribed to the Google Groups "OSv Development" group. To unsubscribe from this group and stop receiving emails from it, send an email to [email protected]. For more options, visit https://groups.google.com/d/optout.
