loader_parse_cmdline accepts input str - OSv commandline, say: --env=AA=aa --env=BB=bb1\ bb2 app.so arg1 arg2
The loader options are parsed and saved into argv, up to first not-loader-option token. argc is set to number of loader options. app_cmdline is set to unconsumed part of input str. Note: quoting loader options containing space with actual double-quote or single-quote character is not supported. Instead, backlash character is used to allow spaces in loader options. Missing support for quoting with single/double-quote is reason to left commandline beyond loader options unmodified, so it can be parsed later, by code with support for quoting. Signed-off-by: Justin Cinkelj <justin.cink...@xlab.si> --- core/commands.cc | 120 +++++++++++++++++++++++++++ include/osv/commands.hh | 1 + tests/tst-commands.cc | 211 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 332 insertions(+) diff --git a/core/commands.cc b/core/commands.cc index 6287d76..03777f5 100644 --- a/core/commands.cc +++ b/core/commands.cc @@ -318,6 +318,126 @@ std::string getcmdline() return std::string(osv_cmdline); } +#define MY_DEBUG(args...) if(0) printf(args) +/* +loader_parse_cmdline accepts input str - OSv commandline, say: +--env=AA=aa --env=BB=bb1\ bb2 app.so arg1 arg2 + +The loader options are parsed and saved into argv, up to first not-loader-option +token. argc is set to number of loader options. +app_cmdline is set to unconsumed part of input str. +Example output: +argc = 2 +argv[0] = "--env=AA=aa" +argv[1] = "--env=BB=bb1 bb2" +argv[2] = NULL +app_cmdline = "app.so arg1 arg2" + +The whitespaces can be escaped with '\' to allow options with spaces. +Notes: + - _quoting_ loader options with space is not supported. + - input str is modified. + - output argv has to be free-d by caller. + - the strings pointed to by output argv and app_cmdline are in same memory as + original input str. The caller is not permited to modify or free data at str + after the call to loader_parse_cmdline, as that would corrupt returned + results in argv and app_cmdline. + +Note that std::string is intentionly not used, as it is not fully functional when +called early during boot. +*/ +void loader_parse_cmdline(char* str, int *pargc, char*** pargv, char** app_cmdline) { + *pargv = nullptr; + *pargc = 0; + *app_cmdline = nullptr; + + const char *delim = " \t\n"; + char esc = '\\'; + + // parse string + char *ap; + char *ap0=nullptr, *apE=nullptr; // first and last token. + int ntoken = 0; + ap0 = nullptr; + while(1) { + // Did we already consume all loader options? + // Look at first non-space char - if =='-', than this is loader option. + // Otherwise, it is application command. + char *ch = str; + while (ch && *ch != '\0') { + if (strchr(delim, *ch)) { + ch++; + continue; + } + else if (*ch == '-') { + // this is a loader option, continue with loader parsing + break; + } + else { + // ch is not space or '-', it is start of application command + // Save current position and stop loader parsing. + *app_cmdline = str; + break; + } + } + if (*ch == '\0') { + // empty str, contains only spaces + *app_cmdline = str; + } + if (*app_cmdline) { + break; + } + // there are loader options, continue with parsing + + ap = stresep(&str, delim, esc); + assert(ap); + + MY_DEBUG(" ap = %p %s, *ap=%d\n", ap, ap, *ap); + if (*ap != '\0') { + // valid token found + ntoken++; + if (ap0 == nullptr) { + ap0 = ap; + } + apE = ap; + } + else { + // Multiple consecutive delimiters found. Stresep will write multiple + // '\0' into str. Squash them into one, so that argv will be 'nice', + // in memory consecutive array of C strings. + if (str) { + MY_DEBUG(" shift str %p '%s' <- %p '%s'\n", str-1, str-1, str, str); + memmove(str-1, str, strlen(str) + 1); + str--; + } + } + if (str == nullptr) { + // end of string, last char was delimiter + *app_cmdline = ap + strlen(ap); // make app_cmdline valid pointer to '\0'. + MY_DEBUG(" make app_cmdline valid pointer to '\\0' ap=%p '%s', app_cmdline=%p '%s'\n", + ap, ap, app_cmdline, app_cmdline); + break; + } + + } + MY_DEBUG(" ap0 = %p '%s', apE = %p '%s', ntoken = %d, app_cmdline=%p '%s'\n", + ap0, ap0, apE, apE, ntoken, *app_cmdline, *app_cmdline); + *pargv = (char**)malloc(sizeof(char*) * (ntoken+1)); + // str was modified, tokes are separated by exactly one '\0' + int ii; + for(ap = ap0, ii = 0; ii < ntoken; ap += strlen(ap)+1, ii++) { + assert(ap != nullptr); + assert(*ap != '\0'); + MY_DEBUG(" argv[%d] = %p %s\n", ii, ap, ap); + (*pargv)[ii] = ap; + } + MY_DEBUG(" ntoken = %d, ii = %d\n", ntoken, ii); + assert(ii == ntoken); + (*pargv)[ii] = nullptr; + *pargc = ntoken; +} +#undef MY_DEBUG + int parse_cmdline(const char *p) { char* save; diff --git a/include/osv/commands.hh b/include/osv/commands.hh index be3aca8..0693d28 100644 --- a/include/osv/commands.hh +++ b/include/osv/commands.hh @@ -25,6 +25,7 @@ parse_command_line(const std::string line, bool &ok); std::string getcmdline(); int parse_cmdline(const char *p); void save_cmdline(std::string newcmd); +void loader_parse_cmdline(char* str, int *pargc, char*** pargv, char** app_cmdline); } #endif // !__OSV_COMMANDS_HH__ diff --git a/tests/tst-commands.cc b/tests/tst-commands.cc index ea0cc07..78f4155 100644 --- a/tests/tst-commands.cc +++ b/tests/tst-commands.cc @@ -11,6 +11,7 @@ #include <osv/commands.hh> #include <fstream> #include <map> +#include <string.h> static int tests = 0, fails = 0; @@ -875,8 +876,218 @@ static bool test_runscript_with_conditional_env_in_script(bool set_env_vars_befo return true; } +bool test_loader_parse_cmdline(const char* instr, std::vector<std::string> ref_argv, const char* ref_app_cmdline) { + char *str, *str_to_be_freed; + // strdup alternative code might catch read beyond end of str string. + // The strdup above might contains \0 beyond terminating '\0', so say + // strlen(str+strlen(str) + 1) still returns 0, or some random number + // instead of scanning random garbage. +#if 0 + *str = strdup(instr); + str_to_be_freed = str; +#else + int str_length = std::max(strlen(instr), 1024ul); + str = (char*)malloc(str_length*9); + str_to_be_freed = str; + memset(str, 'X', str_length*9); + str += str_length*4; + strcpy(str, instr); +#endif + + int argc; + char** argv; + char *app_cmdline; + + //printf("/*-------------------------------------*/\n"); + //printf("str = %p '%s'\n", str, str); + osv::loader_parse_cmdline(str, &argc, &argv, &app_cmdline); + + // print and check result + char **ch; + int ii; + int old_len = 0; + if (argc != (int)ref_argv.size()) { + return false; + } + for (ii = 0, ch = argv; ch != nullptr && *ch != nullptr; ii++, ch++) { + //printf(" argv[%d] = %p '%s', expected = '%s'\n", ii, *ch, *ch, ref_argv[ii].c_str()); + if (ref_argv[ii] != argv[ii]) { + return false; + } + if(ii>0) { + // check that argv strings are consecutive in memory + // not really needed for loader options, but common implementation + // detail for Linux app main(argc, argv). + if ((argv[ii-1] + old_len) != argv[ii]) { + return false; + } + } + old_len = strlen(argv[ii]) + 1; // num of bytes including terminating null. + } + + //printf(" ii = %d, ref_argv.size()=%d\n", ii, (int)ref_argv.size()); + if (ii != argc) { + return false; + } + //printf(" app_cmdline=%p '%s', expected = '%s'\n", app_cmdline, app_cmdline, ref_app_cmdline); + if (std::string(app_cmdline) != ref_app_cmdline) { + return false; + } + + free(argv); + free(str_to_be_freed); + //printf("/*-------------------------------------*/\n"); + return true; +} + +#define STRINGIZE(x) STRINGIZE2(x) +#define STRINGIZE2(x) #x +#define LINE_STRING STRINGIZE(__LINE__) + +void all_test_loader_parse_cmdline() { + // empty + report(test_loader_parse_cmdline("", {}, ""), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline(" ", {}, " "), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline(" ", {}, " "), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + // + report(test_loader_parse_cmdline("-", {"-"}, ""), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline("--", {"--"}, ""), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline("-- ", {"--"}, ""), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline("-- ", {"--"}, " "), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline("-- ", {"--"}, " "), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + + report(test_loader_parse_cmdline("aa", {}, "aa"), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline(" aa", {}, " aa"), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline("aa ", {}, "aa " ), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline(" aa ", {}, " aa "), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline(" aa ", {}, " aa "), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + // + report(test_loader_parse_cmdline("--aa", {"--aa"}, ""), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline(" --aa", {"--aa"}, ""), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline("--aa ", {"--aa"}, "" ), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline(" --aa ", {"--aa"}, ""), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline(" --aa ", {"--aa"}, " "), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + + report(test_loader_parse_cmdline("aa bb", {}, "aa bb"), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline("aa bb", {}, "aa bb"), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline("aa bb", {}, "aa bb"), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline("aa bb", {}, "aa bb"), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + // + report(test_loader_parse_cmdline("--aa --bb", {"--aa", "--bb"}, ""), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline("--aa --bb", {"--aa", "--bb"}, ""), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline("--aa --bb", {"--aa", "--bb"}, ""), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline("--aa --bb", {"--aa", "--bb"}, ""), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + + report(test_loader_parse_cmdline("aa bb ", {}, "aa bb "), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline("aa bb ", {}, "aa bb "), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline("aa bb ", {}, "aa bb "), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + // + report(test_loader_parse_cmdline("--aa --bb ", {"--aa", "--bb"}, ""), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline("--aa --bb ", {"--aa", "--bb"}, " "), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline("--aa --bb ", {"--aa", "--bb"}, " "), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + + report(test_loader_parse_cmdline(" aa bb", {}, " aa bb"), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline(" aa bb", {}, " aa bb"), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline(" aa bb", {}, " aa bb"), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + // + report(test_loader_parse_cmdline(" --aa --bb", {"--aa", "--bb"}, ""), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline(" --aa --bb", {"--aa", "--bb"}, ""), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline(" --aa --bb", {"--aa", "--bb"}, ""), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + + report(test_loader_parse_cmdline(" aa bb ", {}, " aa bb "), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline(" aa bb ", {}, " aa bb "), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline(" aa bb ", {}, " aa bb "), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + // + report(test_loader_parse_cmdline(" --aa --bb ", {"--aa", "--bb"}, ""), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline(" --aa --bb ", {"--aa", "--bb"}, " "), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline(" --aa --bb ", {"--aa", "--bb"}, " "), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + + report(test_loader_parse_cmdline("--aa --bb cc", {"--aa", "--bb"}, "cc"), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline(" --aa --bb cc", {"--aa", "--bb"}, "cc"), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline(" --aa --bb cc", {"--aa", "--bb"}, " cc"), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline(" --aa --bb cc", {"--aa", "--bb"}, " cc"), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + + report(test_loader_parse_cmdline("aa \"bb\" cc", {}, "aa \"bb\" cc"), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline("aa bb\\ cc dd", {}, "aa bb\\ cc dd"), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline("aa bb\\ \\ cc dd", {}, "aa bb\\ \\ cc dd"), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + // + report(test_loader_parse_cmdline("--aa --\"bb\" --cc", {"--aa", "--\"bb\"", "--cc"}, ""), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline("--aa \"bb\" --cc", {"--aa"}, "\"bb\" --cc"), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline("--aa --bb\\ \\ cc --dd", {"--aa", "--bb cc", "--dd"}, ""), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline("--aa --bb\\ \\ --cc --dd", {"--aa", "--bb --cc", "--dd"}, ""), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline("--aa --bb\\ cc --dd", {"--aa", "--bb cc", "--dd"}, ""), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline("--aa --bb\\ --cc --dd", {"--aa", "--bb --cc", "--dd"}, ""), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + + // and realistic/valid OSv cmdline example + report(test_loader_parse_cmdline("--env=AA=aa --env=BB=bb1\\ bb2 --env=CC=cc1\\ \\ cc2\\ cc3 prog arg1 \"arg2a arg2b\" arg3", + {"--env=AA=aa", "--env=BB=bb1 bb2", "--env=CC=cc1 cc2 cc3"}, "prog arg1 \"arg2a arg2b\" arg3"), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); + report(test_loader_parse_cmdline("--env=AA=aa --env=BB=bb1\\ bb2 --env=CC=cc1\\ \\ cc2\\ cc3 prog arg1 \"arg2a arg2b\" arg3 ", + {"--env=AA=aa", "--env=BB=bb1 bb2", "--env=CC=cc1 cc2 cc3"}, " prog arg1 \"arg2a arg2b\" arg3 "), + "TEST=loader_parse_cmdline:LINE=" LINE_STRING); +} + int main(int argc, char *argv[]) { + all_test_loader_parse_cmdline(); + report(test_parse_simplest(), "simplest command line"); report(test_parse_simplest_with_args(), "simplest command line with args"); report(test_parse_simplest_with_quotes(), -- 2.9.5 -- You received this message because you are subscribed to the Google Groups "OSv Development" group. To unsubscribe from this group and stop receiving emails from it, send an email to osv-dev+unsubscr...@googlegroups.com. For more options, visit https://groups.google.com/d/optout.