Ed Hoo has uploaded a new change for review.
https://gerrit.wikimedia.org/r/258753
Change subject: [Cargo] #cargo_query Fix issues with quotes and other parsing
......................................................................
[Cargo] #cargo_query Fix issues with quotes and other parsing
Apologies, the description of the changes will be provided as a separate
comment due to commit issues with git
Change-Id: I56e9bee6237e1bfd66889bf9a63b2b3216ebd2d3
---
M CargoSQLQuery.php
M CargoUtils.php
2 files changed, 158 insertions(+), 118 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Cargo
refs/changes/53/258753/1
diff --git a/CargoSQLQuery.php b/CargoSQLQuery.php
index defc8ba..e1eaf17 100644
--- a/CargoSQLQuery.php
+++ b/CargoSQLQuery.php
@@ -95,19 +95,18 @@
'/\-\-/' => '--',
'/#/' => '#',
);
- // HTML-decode the string - this is necessary if the query
- // contains a call to {{PAGENAME}} and the page name has any
- // special characters, because {{PAGENAME]] unfortunately
- // HTML-encodes the value, which leads to a '#' in the string.
- $decodedWhereStr = html_entity_decode( $whereStr );
foreach ( $whereStrRegexps as $regexp => $displayString ) {
- if ( preg_match( $regexp, $decodedWhereStr ) ) {
+ if ( preg_match( $regexp, $whereStr ) ) {
throw new MWException( "Error in \"where\"
parameter: the string \"$displayString\" cannot be used within #cargo_query." );
}
}
- $simplifiedWhereStr = str_replace( array( '\"', "\'" ), '',
$whereStr );
- $simplifiedWhereStr = preg_replace( '/"[^"]*"/', '',
$simplifiedWhereStr );
- $simplifiedWhereStr = preg_replace( "/'[^']*'/", '',
$simplifiedWhereStr );
+ $noQuotesPattern = '/([\'"]).*?\1/';
+ $noQuotesFieldsStr = preg_replace( $noQuotesPattern, '',
$fieldsStr );
+ $noQuotesWhereStr = preg_replace( $noQuotesPattern, '',
$whereStr );
+ $noQuotesJoinOnStr = preg_replace( $noQuotesPattern, '',
$joinOnStr );
+ $noQuotesGroupByStr = preg_replace( $noQuotesPattern, '',
$groupByStr );
+ $noQuotesHavingStr = preg_replace( $noQuotesPattern, '',
$havingStr );
+ $noQuotesOrderByStr = preg_replace( $noQuotesPattern, '',
$orderByStr );
$regexps = array(
'/\bselect\b/i' => 'SELECT',
@@ -123,22 +122,22 @@
);
foreach ( $regexps as $regexp => $displayString ) {
if ( preg_match( $regexp, $tablesStr ) ||
- preg_match( $regexp, $fieldsStr ) ||
- preg_match( $regexp, $simplifiedWhereStr ) ||
- preg_match( $regexp, $joinOnStr ) ||
- preg_match( $regexp, $groupByStr ) ||
- preg_match( $regexp, $havingStr ) ||
- preg_match( $regexp, $orderByStr ) ||
+ preg_match( $regexp, $noQuotesFieldsStr ) ||
+ preg_match( $regexp, $noQuotesWhereStr ) ||
+ preg_match( $regexp, $noQuotesJoinOnStr ) ||
+ preg_match( $regexp, $noQuotesGroupByStr ) ||
+ preg_match( $regexp, $noQuotesHavingStr ) ||
+ preg_match( $regexp, $noQuotesOrderByStr ) ||
preg_match( $regexp, $limitStr ) ) {
throw new MWException( "Error: the string
\"$displayString\" cannot be used within #cargo_query." );
}
}
- self::getAndValidateSQLFunctions( $simplifiedWhereStr );
- self::getAndValidateSQLFunctions( $joinOnStr );
- self::getAndValidateSQLFunctions( $groupByStr );
- self::getAndValidateSQLFunctions( $havingStr );
- self::getAndValidateSQLFunctions( $orderByStr );
+ self::getAndValidateSQLFunctions( $noQuotesWhereStr );
+ self::getAndValidateSQLFunctions( $noQuotesJoinOnStr );
+ self::getAndValidateSQLFunctions( $noQuotesGroupByStr );
+ self::getAndValidateSQLFunctions( $noQuotesHavingStr );
+ self::getAndValidateSQLFunctions( $noQuotesOrderByStr );
self::getAndValidateSQLFunctions( $limitStr );
}
@@ -340,7 +339,7 @@
global $wgCargoAllowedSQLFunctions;
$sqlFunctionMatches = array();
- $sqlFunctionRegex = '/(\b|\W)(\w*?)\s?\(/';
+ $sqlFunctionRegex = '/(\b|\W)(\w*?)\s*\(/';
preg_match_all( $sqlFunctionRegex, $str, $sqlFunctionMatches );
$sqlFunctions = array_map( 'strtoupper', $sqlFunctionMatches[2]
);
// Throw an error if any of these functions
@@ -376,23 +375,54 @@
$this->mFieldTables = array();
foreach ( $this->mAliasedFieldNames as $alias => $origFieldName
) {
$tableName = null;
- if ( strpos( $origFieldName, '.' ) !== false ) {
- // This could probably be done better with
- // regexps.
- list( $tableName, $fieldName ) = explode( '.',
$origFieldName, 2 );
- } else {
- $fieldName = $origFieldName;
- $tableName = null;
- }
+ $fieldName = null;
$description = new CargoFieldDescription();
+
+ $fieldPattern = '/^([\w$]+)([.]([\w$]+))?$/';
+ $fieldPatternFound = preg_match( $fieldPattern,
$origFieldName, $fieldPatternMatches );
+ $stringPatternFound = false;
+ $hasFunctionCall = false;
+
+ if ( $fieldPatternFound ) {
+ switch ( count( $fieldPatternMatches ) ) {
+ case 2:
+ $fieldName =
$fieldPatternMatches[1];
+ break;
+ case 4:
+ $tableName =
$fieldPatternMatches[1];
+ $fieldName =
$fieldPatternMatches[3];
+ break;
+ }
+ } else {
+ $stringPattern = '/^(([\'"]).*?\2)(.+)?$/';
+ $stringPatternFound = preg_match(
$stringPattern, $origFieldName, $stringPatternMatches );
+ if ( $stringPatternFound ) {
+ // If the count is 3 we have a single
quoted string
+ // If the count is 4 we have stuff
after it
+ $stringPatternFound = count(
$stringPatternMatches ) == 3;
+ }
+
+ if ( ! $stringPatternFound ) {
+ $noQuotesPattern = '/([\'"]).*?\1/';
+ $noQuotesOrigFieldName = preg_replace(
$noQuotesPattern, '', $origFieldName);
+
+ $functionCallPattern = '/\w\s*\(/';
+ $hasFunctionCall = preg_match(
$functionCallPattern, $noQuotesOrigFieldName );
+ }
+ }
+
// If it's a pre-defined field, we probably know its
// type.
if ( $fieldName == '_ID' || $fieldName == '_rowID' ||
$fieldName == '_pageID' ) {
$description->mType = 'Integer';
+ } elseif ( $fieldName == '_pageTitle' ) {
+ // It's a string - do nothing.
} elseif ( $fieldName == '_pageName' ) {
$description->mType = 'Page';
- } elseif ( strpos( $origFieldName, '(' ) !== false ) {
- $sqlFunctions =
self::getAndValidateSQLFunctions( $origFieldName );
+ } elseif ( $stringPatternFound ) {
+ // It's a quoted, literal string - do nothing.
+ } elseif ( $hasFunctionCall ) {
+ $sqlFunctions =
self::getAndValidateSQLFunctions( $noQuotesOrigFieldName );
$firstFunction = $sqlFunctions[0];
if ( in_array( $firstFunction, array( 'COUNT',
'FLOOR', 'CEIL', 'ROUND' ) ) ) {
$description->mType = 'Integer';
@@ -404,8 +434,6 @@
}
// If it's anything else ('CONCAT', 'SUBSTRING',
// etc. etc.), we don't have to do anything.
- } elseif ( preg_match( "/^'.*'$/m", $fieldName ) ) {
- // It's a quoted, literal string - do nothing.
} else {
// It's a standard field - though if it's
// '_value', or ends in '__full', it's actually
@@ -526,11 +554,6 @@
return false;
}
- function whereStringHasPattern( $searchPattern ) {
- $searchPattern = "/(^|\s|\()$searchPattern\s/";
- return preg_match( $searchPattern, $this->mWhereStr, $matches );
- }
-
function handleVirtualFields() {
// The array-field alias can be found in a number of different
// clauses. Handling depends on which clause it is:
@@ -565,61 +588,56 @@
foreach ( $virtualFields as $virtualField ) {
$fieldName = $virtualField['fieldName'];
$tableName = $virtualField['tableName'];
+ $foundLikeMatch1 = $foundMatch1 = $foundLikeMatch2 =
$foundMatch2 = false;
+ $patternSuffix = '\s+(HOLDS)\s+(LIKE)?/i';
+ $throwException = false;
- // Is this field in the "where" string at all? If not,
- // skip it.
- $pattern1 = "$tableName\.$fieldName";
- $foundMatch1 = $this->whereStringHasPattern( $pattern1
);
- if ( !$foundMatch1 ) {
- $pattern2 = $fieldName;
- $foundMatch2 = $this->whereStringHasPattern(
$pattern2 );
- if ( !$foundMatch2 ) {
- continue;
+ $simplePattern1 =
CargoUtils::getSQLTableAndFieldPattern( $tableName, $fieldName );
+ if ( preg_match( $simplePattern1, $this->mWhereStr ) ) {
+ $likePattern1 =
CargoUtils::getSQLTableAndFieldPattern( $tableName, $fieldName, false ).
+ $patternSuffix;
+ if ( preg_match( $likePattern1,
$this->mWhereStr, $matches ) ) {
+ $foundMatch1 = count( $matches ) == 3;
+ $foundLikeMatch1 = count( $matches ) ==
4;
}
+ $throwException = $throwException || (!
$foundMatch1 && ! $foundLikeMatch1);
}
-
- $likePattern1 = "$tableName\.$fieldName\s*HOLDS\s*LIKE";
- $foundLikeMatch1 = $this->whereStringHasPattern(
$likePattern1 );
- $foundLikeMatch2 = $foundMatch1 = $foundMatch2 = false;
- if ( !$foundLikeMatch1 ) {
- $likePattern2 = "$fieldName\s*HOLDS\s*LIKE";
- $foundLikeMatch2 =
$this->whereStringHasPattern( $likePattern2 );
- }
-
- if ( !$foundLikeMatch1 && !$foundLikeMatch2 ) {
- $holdsPattern1 =
"$tableName\.$fieldName\s*HOLDS";
- $foundHoldsMatch1 =
$this->whereStringHasPattern( $holdsPattern1 );
- if ( !$foundHoldsMatch1 ) {
- $holdsPattern2 = "$fieldName\s*HOLDS";
- $foundHoldsMatch2 =
$this->whereStringHasPattern( $holdsPattern2 );
+ $simplePattern2 = CargoUtils::getSQLFieldPattern(
$fieldName );
+ if ( preg_match( $simplePattern2, $this->mWhereStr ) ) {
+ $likePattern2 = CargoUtils::getSQLFieldPattern(
$fieldName, false ).
+ $patternSuffix;
+ if ( preg_match( $likePattern2,
$this->mWhereStr, $matches ) ) {
+ $foundMatch2 = count( $matches ) == 3;
+ $foundLikeMatch2 = count( $matches ) ==
4;
}
+ $throwException = $throwException || (!
$foundMatch2 && ! $foundLikeMatch2);
+ }
+ if ( $throwException ) {
+ throw new MWException( "Error: operator for the
virtual field '" .
+ "$tableName.$fieldName' must be 'HOLDS'
or 'HOLDS LIKE'." );
}
- // If no "HOLDS", throw an error.
- if ( !$foundLikeMatch1 && !$foundLikeMatch2 &&
!$foundHoldsMatch1 && !$foundHoldsMatch2 ) {
- throw new MWException( "Error: operator for the
virtual field '"
- . "$tableName.$fieldName' must be 'HOLDS' or
'HOLDS LIKE'." );
- }
-
- $fieldTableName = $tableName . '__' . $fieldName;
- $this->addFieldTableToTableNames( $fieldTableName,
$tableName );
- $this->mCargoJoinConds[] = array(
- 'joinType' => 'LEFT OUTER JOIN',
- 'table1' => $tableName,
- 'field1' => '_ID',
- 'table2' => $fieldTableName,
- 'field2' => '_rowID'
- );
- if ( $foundLikeMatch1 ) {
- $this->mWhereStr = preg_replace(
"/$likePattern1/", "$fieldTableName._value LIKE",
- $this->mWhereStr );
- } elseif ( $foundLikeMatch2 ) {
- $this->mWhereStr = preg_replace(
"/$likePattern2/", "$fieldTableName._value LIKE",
- $this->mWhereStr );
- } elseif ( $foundHoldsMatch1 ) {
- $this->mWhereStr = preg_replace(
"/$holdsPattern1/", "$fieldTableName._value=", $this->mWhereStr );
- } elseif ( $foundHoldsMatch2 ) {
- $this->mWhereStr = preg_replace(
"/$holdsPattern2/", "$fieldTableName._value=", $this->mWhereStr );
+ if ( $foundLikeMatch1 || $foundLikeMatch2 ||
$foundMatch1 || $foundMatch2 ) {
+ $fieldTableName = $tableName . '__' .
$fieldName;
+ $this->addFieldTableToTableNames(
$fieldTableName, $tableName );
+ $this->mCargoJoinConds[] = array(
+ 'joinType' => 'LEFT OUTER JOIN',
+ 'table1' => $tableName,
+ 'field1' => '_ID',
+ 'table2' => $fieldTableName,
+ 'field2' => '_rowID'
+ );
+ if ( $foundLikeMatch1 ) {
+ $this->mWhereStr = preg_replace(
$likePattern1, "$fieldTableName._value LIKE ",
+ $this->mWhereStr );
+ } elseif ( $foundLikeMatch2 ) {
+ $this->mWhereStr = preg_replace(
$likePattern2, "$fieldTableName._value LIKE ",
+ $this->mWhereStr );
+ } elseif ( $foundMatch1 ) {
+ $this->mWhereStr = preg_replace(
$likePattern1, "$fieldTableName._value=", $this->mWhereStr );
+ } elseif ( $foundMatch2 ) {
+ $this->mWhereStr = preg_replace(
$likePattern2, "$fieldTableName._value=", $this->mWhereStr );
+ }
}
}
@@ -672,9 +690,9 @@
foreach ( $virtualFields as $virtualField ) {
$fieldName = $virtualField['fieldName'];
$tableName = $virtualField['tableName'];
- $pattern1 = "/(^|\s)$tableName\.$fieldName($|\s)/";
+ $pattern1 = CargoUtils::getSQLTableAndFieldPattern(
$tableName, $fieldName );
$foundMatch1 = preg_match( $pattern1,
$this->mGroupByStr, $matches );
- $pattern2 = "/(^|\s)$fieldName($|\s)/";
+ $pattern2 = CargoUtils::getSQLFieldPattern( $fieldName
);
$foundMatch2 = false;
if ( !$foundMatch1 ) {
@@ -746,9 +764,9 @@
foreach ( $virtualFields as $virtualField ) {
$fieldName = $virtualField['fieldName'];
$tableName = $virtualField['tableName'];
- $pattern1 = "/(^|\s)$tableName\.$fieldName($|\s)/";
+ $pattern1 = CargoUtils::getSQLTableAndFieldPattern(
$tableName, $fieldName );
$foundMatch1 = preg_match( $pattern1,
$this->mOrderByStr, $matches );
- $pattern2 = "/(^|\s)$fieldName($|\s)/";
+ $pattern2 = CargoUtils::getSQLFieldPattern( $fieldName
);
$foundMatch2 = false;
if ( !$foundMatch1 ) {
@@ -842,19 +860,21 @@
foreach ( $coordinateFields as $coordinateField ) {
$fieldName = $coordinateField['fieldName'];
$tableName = $coordinateField['tableName'];
- $pattern1 =
"/(^|\s|\()$tableName\.$fieldName(\s*NEAR\s*)\(([^)]*)\)/";
+ $patternSuffix = '(\s+NEAR\s*)\(([^)]*)\)/i';
+
+ $pattern1 = CargoUtils::getSQLTableAndFieldPattern(
$tableName, $fieldName, false ) . $patternSuffix;
$foundMatch1 = preg_match( $pattern1, $this->mWhereStr,
$matches );
if ( !$foundMatch1 ) {
- $pattern2 =
"/(^|\s|\()$fieldName(\s*NEAR\s*)\(([^)]*)\)/";
+ $pattern2 = CargoUtils::getSQLFieldPattern(
$fieldName, false ) . $patternSuffix;
$foundMatch2 = preg_match( $pattern2,
$this->mWhereStr, $matches );
}
if ( $foundMatch1 || $foundMatch2 ) {
// If no "NEAR", throw an error.
- if ( count( $matches ) != 3 ) {
+ if ( count( $matches ) != 4 ) {
throw new MWException( "Error: operator
for the virtual coordinates field "
. "'$tableName.$fieldName' must be
'NEAR'." );
}
- $coordinatesAndDistance = explode( ',',
$matches[2] );
+ $coordinatesAndDistance = explode( ',',
$matches[3] );
if ( count( $coordinatesAndDistance ) != 3 ) {
throw new MWException( "Error: value
for the 'NEAR' operator must be of the form "
. "\"(latitude, longitude,
distance)\"." );
@@ -1034,10 +1054,7 @@
$tableNamePatterns = array();
$tableNameReplacements = array();
foreach ( $this->mTableNames as $tableName ) {
- // Is there a way to do this with just one regexp?
- $tableNamePatterns[] = "/^$tableName\./";
- $tableNamePatterns[] = "/(\W)$tableName\./";
- $tableNameReplacements[] = $cdb->tableName( $tableName
) . ".";
+ $tableNamePatterns[] =
CargoUtils::getSQLTablePattern($tableName);
$tableNameReplacements[] = "$1" . $cdb->tableName(
$tableName ) . ".";
}
diff --git a/CargoUtils.php b/CargoUtils.php
index 8799fa9..f92971c 100644
--- a/CargoUtils.php
+++ b/CargoUtils.php
@@ -188,8 +188,10 @@
}
/**
- * Splits a string by the delimiter, but ignores delimiters contained
- * within parentheses.
+ * Splits a string by the delimiter, but ensures that parenthesis,
separators
+ * and "the other quote" (single quote in a double quoted string or
double
+ * quote in a single quoted string) inside a quoted string are not
considered
+ * lexically.
*/
static function smartSplit( $delimiter, $string ) {
if ( $string == '' ) {
@@ -199,8 +201,6 @@
$ignoreNextChar = false;
$returnValues = array();
$numOpenParentheses = 0;
- $numOpenSingleQuotes = 0;
- $numOpenDoubleQuotes = 0;
$curReturnValue = '';
for ( $i = 0; $i < strlen( $string ); $i++ ) {
@@ -216,26 +216,18 @@
$numOpenParentheses++;
} elseif ( $curChar == ')' ) {
$numOpenParentheses--;
- } elseif ( $curChar == '\'' ) {
- if ( $numOpenSingleQuotes == 0 ) {
- $numOpenSingleQuotes = 1;
- } else {
- $numOpenSingleQuotes = 0;
+ } elseif ( $curChar == '\'' || $curChar == '"' ) {
+ $pos = strpos( $string, $curChar, $i + 1 );
+ if ( $pos === false ) {
+ throw new MWException( "Error:
unmatched quote in SQL string constant." );
}
- } elseif ( $curChar == '"' ) {
- if ( $numOpenDoubleQuotes == 0 ) {
- $numOpenDoubleQuotes = 1;
- } else {
- $numOpenDoubleQuotes = 0;
- }
+ $curReturnValue .= substr( $string, $i, $pos -
$i );
+ $i = $pos;
} elseif ( $curChar == '\\' ) {
$ignoreNextChar = true;
}
- if ( $curChar == $delimiter &&
- $numOpenParentheses == 0 &&
- $numOpenSingleQuotes == 0 &&
- $numOpenDoubleQuotes == 0 ) {
+ if ( $curChar == $delimiter && $numOpenParentheses == 0
) {
$returnValues[] = trim( $curReturnValue );
$curReturnValue = '';
} else {
@@ -248,6 +240,37 @@
}
/**
+ * Generates a Regular Expression to match $fieldName in a SQL string.
+ * Allows for $ as valid identifier character.
+ */
+ public static function getSQLFieldPattern( $fieldName, $closePattern =
true ) {
+ $fieldName = str_replace( '$', '\$', $fieldName );
+ $pattern = '/([^\w$.]|^)'.$fieldName;
+ return $pattern . ( $closePattern ? '([^\w$]|$)/i' : '' );
+ }
+
+ /**
+ * Generates a Regular Expression to match $tableName.$fieldName in a
+ * SQL string. Allows for $ as valid identifier character.
+ */
+ public static function getSQLTableAndFieldPattern( $tableName,
$fieldName, $closePattern = true ) {
+ $fieldName = str_replace( '$', '\$', $fieldName );
+ $tableName = str_replace( '$', '\$', $tableName );
+ $pattern = '/([^\w$]|^)'.$tableName.'\.'.$fieldName;
+ return $pattern . ( $closePattern ? '([^\w$]|$)/i' : '' );
+ }
+
+ /**
+ * Generates a Regular Expression to match $tableName in a SQL string.
+ * Allows for $ as valid identifier character.
+ */
+ public static function getSQLTablePattern( $tableName, $closePattern =
true ) {
+ $tableName = str_replace( '$', '\$', $tableName );
+ $pattern = '/([^\w$]|^)'.$tableName.'\.';
+ return $pattern . ( $closePattern ? '/i' : '' );
+ }
+
+ /**
* Parse a piece of wikitext differently depending on whether
* we're in a special or regular page.
*
--
To view, visit https://gerrit.wikimedia.org/r/258753
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I56e9bee6237e1bfd66889bf9a63b2b3216ebd2d3
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Cargo
Gerrit-Branch: master
Gerrit-Owner: Ed Hoo <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits