Ed Hoo has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/258753

Change subject: [Cargo] #cargo_query Fix issues with quotes and other parsing
......................................................................

[Cargo] #cargo_query Fix issues with quotes and other parsing

Apologies, the description of the changes will be provided as a separate 
comment due to commit issues with git

Change-Id: I56e9bee6237e1bfd66889bf9a63b2b3216ebd2d3
---
M CargoSQLQuery.php
M CargoUtils.php
2 files changed, 158 insertions(+), 118 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Cargo 
refs/changes/53/258753/1

diff --git a/CargoSQLQuery.php b/CargoSQLQuery.php
index defc8ba..e1eaf17 100644
--- a/CargoSQLQuery.php
+++ b/CargoSQLQuery.php
@@ -95,19 +95,18 @@
                        '/\-\-/' => '--',
                        '/#/' => '#',
                );
-               // HTML-decode the string - this is necessary if the query
-               // contains a call to {{PAGENAME}} and the page name has any
-               // special characters, because {{PAGENAME]] unfortunately
-               // HTML-encodes the value, which leads to a '#' in the string.
-               $decodedWhereStr = html_entity_decode( $whereStr );
                foreach ( $whereStrRegexps as $regexp => $displayString ) {
-                       if ( preg_match( $regexp, $decodedWhereStr ) ) {
+                       if ( preg_match( $regexp, $whereStr ) ) {
                                throw new MWException( "Error in \"where\" 
parameter: the string \"$displayString\" cannot be used within #cargo_query." );
                        }
                }
-               $simplifiedWhereStr = str_replace( array( '\"', "\'" ), '', 
$whereStr );
-               $simplifiedWhereStr = preg_replace( '/"[^"]*"/', '', 
$simplifiedWhereStr );
-               $simplifiedWhereStr = preg_replace( "/'[^']*'/", '', 
$simplifiedWhereStr );
+               $noQuotesPattern = '/([\'"]).*?\1/';
+               $noQuotesFieldsStr = preg_replace( $noQuotesPattern, '', 
$fieldsStr );
+               $noQuotesWhereStr = preg_replace( $noQuotesPattern, '', 
$whereStr );
+               $noQuotesJoinOnStr = preg_replace( $noQuotesPattern, '', 
$joinOnStr );
+               $noQuotesGroupByStr = preg_replace( $noQuotesPattern, '', 
$groupByStr );
+               $noQuotesHavingStr = preg_replace( $noQuotesPattern, '', 
$havingStr );
+               $noQuotesOrderByStr = preg_replace( $noQuotesPattern, '', 
$orderByStr );
 
                $regexps = array(
                        '/\bselect\b/i' => 'SELECT',
@@ -123,22 +122,22 @@
                );
                foreach ( $regexps as $regexp => $displayString ) {
                        if ( preg_match( $regexp, $tablesStr ) ||
-                               preg_match( $regexp, $fieldsStr ) ||
-                               preg_match( $regexp, $simplifiedWhereStr ) ||
-                               preg_match( $regexp, $joinOnStr ) ||
-                               preg_match( $regexp, $groupByStr ) ||
-                               preg_match( $regexp, $havingStr ) ||
-                               preg_match( $regexp, $orderByStr ) ||
+                               preg_match( $regexp, $noQuotesFieldsStr ) ||
+                               preg_match( $regexp, $noQuotesWhereStr ) ||
+                               preg_match( $regexp, $noQuotesJoinOnStr ) ||
+                               preg_match( $regexp, $noQuotesGroupByStr ) ||
+                               preg_match( $regexp, $noQuotesHavingStr ) ||
+                               preg_match( $regexp, $noQuotesOrderByStr ) ||
                                preg_match( $regexp, $limitStr ) ) {
                                throw new MWException( "Error: the string 
\"$displayString\" cannot be used within #cargo_query." );
                        }
                }
 
-               self::getAndValidateSQLFunctions( $simplifiedWhereStr );
-               self::getAndValidateSQLFunctions( $joinOnStr );
-               self::getAndValidateSQLFunctions( $groupByStr );
-               self::getAndValidateSQLFunctions( $havingStr );
-               self::getAndValidateSQLFunctions( $orderByStr );
+               self::getAndValidateSQLFunctions( $noQuotesWhereStr );
+               self::getAndValidateSQLFunctions( $noQuotesJoinOnStr );
+               self::getAndValidateSQLFunctions( $noQuotesGroupByStr );
+               self::getAndValidateSQLFunctions( $noQuotesHavingStr );
+               self::getAndValidateSQLFunctions( $noQuotesOrderByStr );
                self::getAndValidateSQLFunctions( $limitStr );
        }
 
@@ -340,7 +339,7 @@
                global $wgCargoAllowedSQLFunctions;
 
                $sqlFunctionMatches = array();
-               $sqlFunctionRegex = '/(\b|\W)(\w*?)\s?\(/';
+               $sqlFunctionRegex = '/(\b|\W)(\w*?)\s*\(/';
                preg_match_all( $sqlFunctionRegex, $str, $sqlFunctionMatches );
                $sqlFunctions = array_map( 'strtoupper', $sqlFunctionMatches[2] 
);
                // Throw an error if any of these functions
@@ -376,23 +375,54 @@
                $this->mFieldTables = array();
                foreach ( $this->mAliasedFieldNames as $alias => $origFieldName 
) {
                        $tableName = null;
-                       if ( strpos( $origFieldName, '.' ) !== false ) {
-                               // This could probably be done better with
-                               // regexps.
-                               list( $tableName, $fieldName ) = explode( '.', 
$origFieldName, 2 );
-                       } else {
-                               $fieldName = $origFieldName;
-                               $tableName = null;
-                       }
+                       $fieldName = null;
                        $description = new CargoFieldDescription();
+
+                       $fieldPattern = '/^([\w$]+)([.]([\w$]+))?$/';
+                       $fieldPatternFound = preg_match( $fieldPattern, 
$origFieldName, $fieldPatternMatches );
+                       $stringPatternFound = false;
+                       $hasFunctionCall = false;
+
+                       if ( $fieldPatternFound ) {
+                               switch ( count( $fieldPatternMatches ) ) {
+                                       case 2:
+                                               $fieldName = 
$fieldPatternMatches[1];
+                                               break;
+                                       case 4:
+                                               $tableName = 
$fieldPatternMatches[1];
+                                               $fieldName = 
$fieldPatternMatches[3];
+                                               break;
+                               }
+                       } else {
+                               $stringPattern = '/^(([\'"]).*?\2)(.+)?$/';
+                               $stringPatternFound = preg_match( 
$stringPattern, $origFieldName, $stringPatternMatches );
+                               if ( $stringPatternFound ) {
+                                       // If the count is 3 we have a single 
quoted string
+                                       // If the count is 4 we have stuff 
after it
+                                       $stringPatternFound = count( 
$stringPatternMatches ) == 3;
+                               }
+
+                               if ( ! $stringPatternFound ) {
+                                       $noQuotesPattern = '/([\'"]).*?\1/';
+                                       $noQuotesOrigFieldName = preg_replace( 
$noQuotesPattern, '', $origFieldName);
+
+                                       $functionCallPattern = '/\w\s*\(/';
+                                       $hasFunctionCall = preg_match( 
$functionCallPattern, $noQuotesOrigFieldName );
+                               }
+                       }
+
                        // If it's a pre-defined field, we probably know its
                        // type.
                        if ( $fieldName == '_ID' || $fieldName == '_rowID' || 
$fieldName == '_pageID' ) {
                                $description->mType = 'Integer';
+                       } elseif ( $fieldName == '_pageTitle' ) {
+                               // It's a string - do nothing.
                        } elseif ( $fieldName == '_pageName' ) {
                                $description->mType = 'Page';
-                       } elseif ( strpos( $origFieldName, '(' ) !== false ) {
-                               $sqlFunctions = 
self::getAndValidateSQLFunctions( $origFieldName );
+                       } elseif ( $stringPatternFound ) {
+                               // It's a quoted, literal string - do nothing.
+                       } elseif ( $hasFunctionCall ) {
+                               $sqlFunctions = 
self::getAndValidateSQLFunctions( $noQuotesOrigFieldName );
                                $firstFunction = $sqlFunctions[0];
                                if ( in_array( $firstFunction, array( 'COUNT', 
'FLOOR', 'CEIL', 'ROUND' ) ) ) {
                                        $description->mType = 'Integer';
@@ -404,8 +434,6 @@
                                }
                                // If it's anything else ('CONCAT', 'SUBSTRING',
                                // etc. etc.), we don't have to do anything.
-                       } elseif ( preg_match( "/^'.*'$/m", $fieldName ) ) {
-                               // It's a quoted, literal string - do nothing.
                        } else {
                                // It's a standard field - though if it's
                                // '_value', or ends in '__full', it's actually
@@ -526,11 +554,6 @@
                return false;
        }
 
-       function whereStringHasPattern( $searchPattern ) {
-               $searchPattern = "/(^|\s|\()$searchPattern\s/";
-               return preg_match( $searchPattern, $this->mWhereStr, $matches );
-       }
-
        function handleVirtualFields() {
                // The array-field alias can be found in a number of different
                // clauses. Handling depends on which clause it is:
@@ -565,61 +588,56 @@
                foreach ( $virtualFields as $virtualField ) {
                        $fieldName = $virtualField['fieldName'];
                        $tableName = $virtualField['tableName'];
+                       $foundLikeMatch1 = $foundMatch1 = $foundLikeMatch2 = 
$foundMatch2 = false;
+                       $patternSuffix = '\s+(HOLDS)\s+(LIKE)?/i';
+                       $throwException = false;
 
-                       // Is this field in the "where" string at all? If not,
-                       // skip it.
-                       $pattern1 = "$tableName\.$fieldName";
-                       $foundMatch1 = $this->whereStringHasPattern( $pattern1 
);
-                       if ( !$foundMatch1 ) {
-                               $pattern2 = $fieldName;
-                               $foundMatch2 = $this->whereStringHasPattern( 
$pattern2 );
-                               if ( !$foundMatch2 ) {
-                                       continue;
+                       $simplePattern1 = 
CargoUtils::getSQLTableAndFieldPattern( $tableName, $fieldName );
+                       if ( preg_match( $simplePattern1, $this->mWhereStr ) ) {
+                               $likePattern1 = 
CargoUtils::getSQLTableAndFieldPattern( $tableName, $fieldName, false ).
+                                               $patternSuffix;
+                               if ( preg_match( $likePattern1, 
$this->mWhereStr, $matches ) ) {
+                                       $foundMatch1 = count( $matches ) == 3;
+                                       $foundLikeMatch1 = count( $matches ) == 
4;
                                }
+                               $throwException = $throwException || (! 
$foundMatch1 && ! $foundLikeMatch1);
                        }
-
-                       $likePattern1 = "$tableName\.$fieldName\s*HOLDS\s*LIKE";
-                       $foundLikeMatch1 = $this->whereStringHasPattern( 
$likePattern1 );
-                       $foundLikeMatch2 = $foundMatch1 = $foundMatch2 = false;
-                       if ( !$foundLikeMatch1 ) {
-                               $likePattern2 = "$fieldName\s*HOLDS\s*LIKE";
-                               $foundLikeMatch2 = 
$this->whereStringHasPattern( $likePattern2 );
-                       }
-
-                       if ( !$foundLikeMatch1 && !$foundLikeMatch2 ) {
-                               $holdsPattern1 = 
"$tableName\.$fieldName\s*HOLDS";
-                               $foundHoldsMatch1 = 
$this->whereStringHasPattern( $holdsPattern1 );
-                               if ( !$foundHoldsMatch1 ) {
-                                       $holdsPattern2 = "$fieldName\s*HOLDS";
-                                       $foundHoldsMatch2 = 
$this->whereStringHasPattern( $holdsPattern2 );
+                       $simplePattern2 = CargoUtils::getSQLFieldPattern( 
$fieldName );
+                       if ( preg_match( $simplePattern2, $this->mWhereStr ) ) {
+                               $likePattern2 = CargoUtils::getSQLFieldPattern( 
$fieldName, false ).
+                                               $patternSuffix;
+                               if ( preg_match( $likePattern2, 
$this->mWhereStr, $matches ) ) {
+                                       $foundMatch2 = count( $matches ) == 3;
+                                       $foundLikeMatch2 = count( $matches ) == 
4;
                                }
+                               $throwException = $throwException || (! 
$foundMatch2 && ! $foundLikeMatch2);
+                       }
+                       if ( $throwException ) {
+                               throw new MWException( "Error: operator for the 
virtual field '" .
+                                       "$tableName.$fieldName' must be 'HOLDS' 
or 'HOLDS LIKE'." );
                        }
 
-                       // If no "HOLDS", throw an error.
-                       if ( !$foundLikeMatch1 && !$foundLikeMatch2 && 
!$foundHoldsMatch1 && !$foundHoldsMatch2 ) {
-                               throw new MWException( "Error: operator for the 
virtual field '"
-                               . "$tableName.$fieldName' must be 'HOLDS' or 
'HOLDS LIKE'." );
-                       }
-
-                       $fieldTableName = $tableName . '__' . $fieldName;
-                       $this->addFieldTableToTableNames( $fieldTableName, 
$tableName );
-                       $this->mCargoJoinConds[] = array(
-                               'joinType' => 'LEFT OUTER JOIN',
-                               'table1' => $tableName,
-                               'field1' => '_ID',
-                               'table2' => $fieldTableName,
-                               'field2' => '_rowID'
-                       );
-                       if ( $foundLikeMatch1 ) {
-                               $this->mWhereStr = preg_replace( 
"/$likePattern1/", "$fieldTableName._value LIKE",
-                                       $this->mWhereStr );
-                       } elseif ( $foundLikeMatch2 ) {
-                               $this->mWhereStr = preg_replace( 
"/$likePattern2/", "$fieldTableName._value LIKE",
-                                       $this->mWhereStr );
-                       } elseif ( $foundHoldsMatch1 ) {
-                               $this->mWhereStr = preg_replace( 
"/$holdsPattern1/", "$fieldTableName._value=", $this->mWhereStr );
-                       } elseif ( $foundHoldsMatch2 ) {
-                               $this->mWhereStr = preg_replace( 
"/$holdsPattern2/", "$fieldTableName._value=", $this->mWhereStr );
+                       if ( $foundLikeMatch1 || $foundLikeMatch2 || 
$foundMatch1 || $foundMatch2 ) {
+                               $fieldTableName = $tableName . '__' . 
$fieldName;
+                               $this->addFieldTableToTableNames( 
$fieldTableName, $tableName );
+                               $this->mCargoJoinConds[] = array(
+                                       'joinType' => 'LEFT OUTER JOIN',
+                                       'table1' => $tableName,
+                                       'field1' => '_ID',
+                                       'table2' => $fieldTableName,
+                                       'field2' => '_rowID'
+                               );
+                               if ( $foundLikeMatch1 ) {
+                                       $this->mWhereStr = preg_replace( 
$likePattern1, "$fieldTableName._value LIKE ",
+                                               $this->mWhereStr );
+                               } elseif ( $foundLikeMatch2 ) {
+                                       $this->mWhereStr = preg_replace( 
$likePattern2, "$fieldTableName._value LIKE ",
+                                               $this->mWhereStr );
+                               } elseif ( $foundMatch1 ) {
+                                       $this->mWhereStr = preg_replace( 
$likePattern1, "$fieldTableName._value=", $this->mWhereStr );
+                               } elseif ( $foundMatch2 ) {
+                                       $this->mWhereStr = preg_replace( 
$likePattern2, "$fieldTableName._value=", $this->mWhereStr );
+                               }
                        }
                }
 
@@ -672,9 +690,9 @@
                foreach ( $virtualFields as $virtualField ) {
                        $fieldName = $virtualField['fieldName'];
                        $tableName = $virtualField['tableName'];
-                       $pattern1 = "/(^|\s)$tableName\.$fieldName($|\s)/";
+                       $pattern1 = CargoUtils::getSQLTableAndFieldPattern( 
$tableName, $fieldName );
                        $foundMatch1 = preg_match( $pattern1, 
$this->mGroupByStr, $matches );
-                       $pattern2 = "/(^|\s)$fieldName($|\s)/";
+                       $pattern2 = CargoUtils::getSQLFieldPattern( $fieldName 
);
                        $foundMatch2 = false;
 
                        if ( !$foundMatch1 ) {
@@ -746,9 +764,9 @@
                foreach ( $virtualFields as $virtualField ) {
                        $fieldName = $virtualField['fieldName'];
                        $tableName = $virtualField['tableName'];
-                       $pattern1 = "/(^|\s)$tableName\.$fieldName($|\s)/";
+                       $pattern1 = CargoUtils::getSQLTableAndFieldPattern( 
$tableName, $fieldName );
                        $foundMatch1 = preg_match( $pattern1, 
$this->mOrderByStr, $matches );
-                       $pattern2 = "/(^|\s)$fieldName($|\s)/";
+                       $pattern2 = CargoUtils::getSQLFieldPattern( $fieldName 
);
                        $foundMatch2 = false;
 
                        if ( !$foundMatch1 ) {
@@ -842,19 +860,21 @@
                foreach ( $coordinateFields as $coordinateField ) {
                        $fieldName = $coordinateField['fieldName'];
                        $tableName = $coordinateField['tableName'];
-                       $pattern1 = 
"/(^|\s|\()$tableName\.$fieldName(\s*NEAR\s*)\(([^)]*)\)/";
+                       $patternSuffix = '(\s+NEAR\s*)\(([^)]*)\)/i';
+
+                       $pattern1 = CargoUtils::getSQLTableAndFieldPattern( 
$tableName, $fieldName, false ) . $patternSuffix;
                        $foundMatch1 = preg_match( $pattern1, $this->mWhereStr, 
$matches );
                        if ( !$foundMatch1 ) {
-                               $pattern2 = 
"/(^|\s|\()$fieldName(\s*NEAR\s*)\(([^)]*)\)/";
+                               $pattern2 = CargoUtils::getSQLFieldPattern( 
$fieldName, false ) . $patternSuffix;
                                $foundMatch2 = preg_match( $pattern2, 
$this->mWhereStr, $matches );
                        }
                        if ( $foundMatch1 || $foundMatch2 ) {
                                // If no "NEAR", throw an error.
-                               if ( count( $matches ) != 3 ) {
+                               if ( count( $matches ) != 4 ) {
                                        throw new MWException( "Error: operator 
for the virtual coordinates field "
                                        . "'$tableName.$fieldName' must be 
'NEAR'." );
                                }
-                               $coordinatesAndDistance = explode( ',', 
$matches[2] );
+                               $coordinatesAndDistance = explode( ',', 
$matches[3] );
                                if ( count( $coordinatesAndDistance ) != 3 ) {
                                        throw new MWException( "Error: value 
for the 'NEAR' operator must be of the form "
                                        . "\"(latitude, longitude, 
distance)\"." );
@@ -1034,10 +1054,7 @@
                $tableNamePatterns = array();
                $tableNameReplacements = array();
                foreach ( $this->mTableNames as $tableName ) {
-                       // Is there a way to do this with just one regexp?
-                       $tableNamePatterns[] = "/^$tableName\./";
-                       $tableNamePatterns[] = "/(\W)$tableName\./";
-                       $tableNameReplacements[] = $cdb->tableName( $tableName 
) . ".";
+                       $tableNamePatterns[] = 
CargoUtils::getSQLTablePattern($tableName);
                        $tableNameReplacements[] = "$1" . $cdb->tableName( 
$tableName ) . ".";
                }
 
diff --git a/CargoUtils.php b/CargoUtils.php
index 8799fa9..f92971c 100644
--- a/CargoUtils.php
+++ b/CargoUtils.php
@@ -188,8 +188,10 @@
        }
 
        /**
-        * Splits a string by the delimiter, but ignores delimiters contained
-        * within parentheses.
+        * Splits a string by the delimiter, but ensures that parenthesis, 
separators
+        * and "the other quote" (single quote in a double quoted string or 
double
+        * quote in a single quoted string) inside a quoted string are not 
considered
+        * lexically.
         */
        static function smartSplit( $delimiter, $string ) {
                if ( $string == '' ) {
@@ -199,8 +201,6 @@
                $ignoreNextChar = false;
                $returnValues = array();
                $numOpenParentheses = 0;
-               $numOpenSingleQuotes = 0;
-               $numOpenDoubleQuotes = 0;
                $curReturnValue = '';
 
                for ( $i = 0; $i < strlen( $string ); $i++ ) {
@@ -216,26 +216,18 @@
                                $numOpenParentheses++;
                        } elseif ( $curChar == ')' ) {
                                $numOpenParentheses--;
-                       } elseif ( $curChar == '\'' ) {
-                               if ( $numOpenSingleQuotes == 0 ) {
-                                       $numOpenSingleQuotes = 1;
-                               } else {
-                                       $numOpenSingleQuotes = 0;
+                       } elseif ( $curChar == '\'' || $curChar == '"' ) {
+                               $pos = strpos( $string, $curChar, $i + 1 );
+                               if ( $pos === false ) {
+                                       throw new MWException( "Error: 
unmatched quote in SQL string constant." );
                                }
-                       } elseif ( $curChar == '"' ) {
-                               if ( $numOpenDoubleQuotes == 0 ) {
-                                       $numOpenDoubleQuotes = 1;
-                               } else {
-                                       $numOpenDoubleQuotes = 0;
-                               }
+                               $curReturnValue .= substr( $string, $i, $pos - 
$i );
+                               $i = $pos;
                        } elseif ( $curChar == '\\' ) {
                                $ignoreNextChar = true;
                        }
 
-                       if ( $curChar == $delimiter &&
-                       $numOpenParentheses == 0 &&
-                       $numOpenSingleQuotes == 0 &&
-                       $numOpenDoubleQuotes == 0 ) {
+                       if ( $curChar == $delimiter && $numOpenParentheses == 0 
) {
                                $returnValues[] = trim( $curReturnValue );
                                $curReturnValue = '';
                        } else {
@@ -248,6 +240,37 @@
        }
 
        /**
+        * Generates a Regular Expression to match $fieldName in a SQL string.
+        * Allows for $ as valid identifier character.
+        */
+       public static function getSQLFieldPattern( $fieldName, $closePattern = 
true ) {
+               $fieldName = str_replace( '$', '\$', $fieldName );
+               $pattern = '/([^\w$.]|^)'.$fieldName;
+               return $pattern . ( $closePattern ? '([^\w$]|$)/i' : '' );
+       }
+
+       /**
+        * Generates a Regular Expression to match $tableName.$fieldName in a
+        * SQL string. Allows for $ as valid identifier character.
+        */
+       public static function getSQLTableAndFieldPattern( $tableName, 
$fieldName, $closePattern = true ) {
+               $fieldName = str_replace( '$', '\$', $fieldName );
+               $tableName = str_replace( '$', '\$', $tableName );
+               $pattern = '/([^\w$]|^)'.$tableName.'\.'.$fieldName;
+               return $pattern . ( $closePattern ? '([^\w$]|$)/i' : '' );
+       }
+
+       /**
+        * Generates a Regular Expression to match $tableName in a SQL string.
+        * Allows for $ as valid identifier character.
+        */
+       public static function getSQLTablePattern( $tableName, $closePattern = 
true ) {
+               $tableName = str_replace( '$', '\$', $tableName );
+               $pattern = '/([^\w$]|^)'.$tableName.'\.';
+               return $pattern . ( $closePattern ? '/i' : '' );
+       }
+
+       /**
         * Parse a piece of wikitext differently depending on whether
         * we're in a special or regular page.
         *

-- 
To view, visit https://gerrit.wikimedia.org/r/258753
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I56e9bee6237e1bfd66889bf9a63b2b3216ebd2d3
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Cargo
Gerrit-Branch: master
Gerrit-Owner: Ed Hoo <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to