Hi all, I am posting an alternate patch. This patch: 1) If token starts from ( do not consider it as function parameters start point and return a token from ( to the next white space
2) If configuration_includes_quoted_values is set to "on" (new style enabled) and token ends on a ( character, consider it as function name. If the token is "parameters" return FunctionParameters type else return FunctionNameUnknown type and parsing fails 3) If configuration_includes_quoted_values is set to "off" (new style enabled) and token ends on a (, if the token is "parameters" return FunctionParameters type, else do not end the token on '(' but to the next whitespace character. For example for the string "test(test) more" will return as token the "test(test)" For the new style if someone want to use '(' character on a regex for example, he should use quotes: 'test(.*/)' "test(.*/)" If someone does not want interpret macros he should use single quotes: 'test(.*)\/$' Is it OK? On 07/29/2013 08:25 PM, Amos Jeffries wrote: > Looks like nobody actually ran the trivial "-k parse" test using 3.HEAD > or the initial quoted strings code. > > Attached is a patch which adds a secondary form of undo for handling > incorrectly identified ConfigParser::FunctionNameToken elements. Instead > of aborting Squid on any non-function token containing a '(' we take the > wrongly identified assumed function name element and reform it with the > '(' delimiter and trailing element. The resulting aggregate token which > should have been identified initially is then sent to the upper parser > layer. > > Amos >
=== modified file 'src/ConfigParser.cc' --- src/ConfigParser.cc 2013-07-22 01:26:09 +0000 +++ src/ConfigParser.cc 2013-07-30 14:47:59 +0000 @@ -37,57 +37,57 @@ #include "Debug.h" #include "fatal.h" #include "globals.h" int ConfigParser::RecognizeQuotedValues = true; std::stack<ConfigParser::CfgFile *> ConfigParser::CfgFiles; ConfigParser::TokenType ConfigParser::LastTokenType = ConfigParser::SimpleToken; char *ConfigParser::LastToken = NULL; char *ConfigParser::CfgLine = NULL; char *ConfigParser::CfgPos = NULL; std::queue<std::string> ConfigParser::Undo_; bool ConfigParser::AllowMacros_ = false; void ConfigParser::destruct() { shutting_down = 1; if (!CfgFiles.empty()) { std::ostringstream message; CfgFile *f = CfgFiles.top(); - message << "Bungled " << f->filePath << " line " << f->lineNo << + message << "Bungled (#1)" << f->filePath << " line " << f->lineNo << ": " << f->currentLine << std::endl; CfgFiles.pop(); delete f; while (!CfgFiles.empty()) { f = CfgFiles.top(); message << " included from " << f->filePath << " line " << f->lineNo << ": " << f->currentLine << std::endl; CfgFiles.pop(); delete f; } message << " included from " << cfg_filename << " line " << config_lineno << ": " << config_input_line << std::endl; std::string msg = message.str(); fatalf("%s", msg.c_str()); } else - fatalf("Bungled %s line %d: %s", + fatalf("Bungled (#2) %s line %d: %s", cfg_filename, config_lineno, config_input_line); } void ConfigParser::TokenUndo() { assert(LastToken); Undo_.push(LastToken); } void ConfigParser::TokenPutBack(const char *tok) { assert(tok); Undo_.push(tok); } char * ConfigParser::Undo() { @@ -213,185 +213,199 @@ char * ConfigParser::TokenParse(char * &nextToken, ConfigParser::TokenType &type, bool legacy) { if (!nextToken || *nextToken == '\0') return NULL; type = ConfigParser::SimpleToken; nextToken += strspn(nextToken, w_space); if (*nextToken == '"' || *nextToken == '\'') { type = ConfigParser::QuotedToken; char *token = UnQuote(nextToken, &nextToken); *nextToken = '\0'; ++nextToken; return token; } char *token = nextToken; if (char *t = strchr(nextToken, '#')) *t = '\0'; const char *sep; - if (legacy) + if (legacy || *nextToken == '(') sep = w_space; else sep = w_space "("; nextToken += strcspn(nextToken, sep); - if (!legacy && *nextToken == '(') - type = ConfigParser::FunctionNameToken; - else + if (!legacy && *nextToken == '(') { + if (strncmp(token, "parameters", nextToken - token) == 0) + type = ConfigParser::FunctionParameters; + else if (ConfigParser::RecognizeQuotedValues) + type = ConfigParser::FunctionNameUnknown; + else { + // For old style parsing we do not want to be strict and + // allow tokens with function like syntax + type = ConfigParser::SimpleToken; + nextToken += strcspn(nextToken, w_space); + } + } else type = ConfigParser::SimpleToken; if (*nextToken != '\0') { *nextToken = '\0'; ++nextToken; } if (*token == '\0') return NULL; return token; } char * ConfigParser::NextElement(ConfigParser::TokenType &type, bool legacy) { char *token = TokenParse(CfgPos, type, legacy); return token; } char * ConfigParser::NextToken() { - if ((LastToken = ConfigParser::Undo())) + if ((LastToken = ConfigParser::Undo())) { + debugs(3, 6, "TOKEN (undone): " << LastToken); return LastToken; + } char *token = NULL; do { while (token == NULL && !CfgFiles.empty()) { ConfigParser::CfgFile *wordfile = CfgFiles.top(); token = wordfile->parse(LastTokenType); if (!token) { assert(!wordfile->isOpen()); CfgFiles.pop(); + debugs(3, 4, "CfgFiles.pop " << wordfile->filePath); delete wordfile; } } if (!token) token = NextElement(LastTokenType); - if (token && LastTokenType == ConfigParser::FunctionNameToken && strcmp("parameters", token) == 0) { + if (token && LastTokenType == ConfigParser::FunctionParameters) { char *path = NextToken(); if (LastTokenType != ConfigParser::QuotedToken) { debugs(3, DBG_CRITICAL, "Quoted filename missing: " << token); self_destruct(); return NULL; } // The next token in current cfg file line must be a ")" char *end = NextToken(); if (LastTokenType != ConfigParser::SimpleToken || strcmp(end, ")") != 0) { debugs(3, DBG_CRITICAL, "missing ')' after " << token << "(\"" << path << "\""); self_destruct(); return NULL; } if (CfgFiles.size() > 16) { debugs(3, DBG_CRITICAL, "WARNING: can't open %s for reading parameters: includes are nested too deeply (>16)!\n" << path); self_destruct(); return NULL; } ConfigParser::CfgFile *wordfile = new ConfigParser::CfgFile(); if (!path || !wordfile->startParse(path)) { debugs(3, DBG_CRITICAL, "Error opening config file: " << token); delete wordfile; self_destruct(); return NULL; } CfgFiles.push(wordfile); token = NULL; - } else if (token && LastTokenType == ConfigParser::FunctionNameToken) { + } else if (token && LastTokenType == ConfigParser::FunctionNameUnknown) { debugs(3, DBG_CRITICAL, "Unknown cfg function: " << token); self_destruct(); return NULL; } } while (token == NULL && !CfgFiles.empty()); return (LastToken = token); } char * ConfigParser::NextQuotedOrToEol() { char *token; if ((token = CfgPos) == NULL) { debugs(3, DBG_CRITICAL, "token is missing"); self_destruct(); return NULL; } token += strspn(token, w_space); if (*token == '\"' || *token == '\'') { //TODO: eat the spaces at the end and check if it is untill the end of file. char *end; token = UnQuote(token, &end); *end = '\0'; CfgPos = end + 1; LastTokenType = ConfigParser::QuotedToken; } else LastTokenType = ConfigParser::SimpleToken; CfgPos = NULL; + debugs(3, 6, "TOKEN (to eol): " << token); return (LastToken = token); } const char * ConfigParser::QuoteString(const String &var) { static String quotedStr; const char *s = var.termedBuf(); bool needQuote = false; for (const char *l = s; !needQuote && *l != '\0'; ++l ) needQuote = !isalnum(*l); if (!needQuote) return s; quotedStr.clean(); quotedStr.append('"'); for (; *s != '\0'; ++s) { if (*s == '"' || *s == '\\') quotedStr.append('\\'); quotedStr.append(*s); } quotedStr.append('"'); return quotedStr.termedBuf(); } bool ConfigParser::CfgFile::startParse(char *path) { assert(wordFile == NULL); + debugs(3, 3, "Parsing from " << path); if ((wordFile = fopen(path, "r")) == NULL) { debugs(3, DBG_CRITICAL, "file :" << path << " not found"); return false; } #if _SQUID_WINDOWS_ setmode(fileno(wordFile), O_TEXT); #endif filePath = path; return getFileLine(); } bool ConfigParser::CfgFile::getFileLine() { // Else get the next line if (fgets(parseBuffer, CONFIG_LINE_LIMIT, wordFile) == NULL) { /* stop reading from file */ fclose(wordFile); === modified file 'src/ConfigParser.h' --- src/ConfigParser.h 2013-07-21 19:24:35 +0000 +++ src/ConfigParser.h 2013-07-30 14:34:17 +0000 @@ -53,41 +53,41 @@ #define CONFIG_LINE_LIMIT 2048 /** * A configuration file Parser. Instances of this class track * parsing state and perform tokenisation. Syntax is currently * taken care of outside this class. * * One reason for this class is to allow testing of configuration * using modules without linking cache_cf.o in - because that drags * in all of squid by reference. Instead the tokeniser only is * brought in. */ class ConfigParser { public: /** * Parsed tokens type: simple tokens, quoted tokens or function * like parameters. */ - enum TokenType {SimpleToken, QuotedToken, FunctionNameToken}; + enum TokenType {SimpleToken, QuotedToken, FunctionParameters, FunctionNameUnknown}; void destruct(); static void ParseUShort(unsigned short *var); static void ParseBool(bool *var); static const char *QuoteString(const String &var); static void ParseWordList(wordlist **list); /** * Backward compatibility wrapper for the ConfigParser::NextToken method. * If the configuration_includes_quoted_values configuration parameter is * set to 'off' this interprets the quoted tokens as filenames. */ static char * strtokFile(); /** * Returns the body of the next element. The element is either a token or * a quoted string with optional escape sequences and/or macros. The body * of a quoted string element does not include quotes or escape sequences. * Future code will want to see Elements and not just their bodies. */