http://www.mediawiki.org/wiki/Special:Code/MediaWiki/73595

Revision: 73595
Author:   aj
Date:     2010-09-23 06:49:42 +0000 (Thu, 23 Sep 2010)

Log Message:
-----------
added mediawiki patch

Added Paths:
-----------
    trunk/parsers/libmwparser/mediawiki.patch

Added: trunk/parsers/libmwparser/mediawiki.patch
===================================================================
--- trunk/parsers/libmwparser/mediawiki.patch                           (rev 0)
+++ trunk/parsers/libmwparser/mediawiki.patch   2010-09-23 06:49:42 UTC (rev 
73595)
@@ -0,0 +1,478 @@
+Index: includes/parser/Parser.php
+===================================================================
+--- includes/parser/Parser.php (revision 69308)
++++ includes/parser/Parser.php (arbetskopia)
+@@ -7,6 +7,9 @@
+  * File for Parser and related classes
+  */
+ 
++include_once('MediaWiki/mwp.php');
++include_once('LinkResolver.php');
++define('USE_LIBMWPARSER', true);
+ 
+ /**
+  * PHP Parser - Processes wiki markup (which uses a more user-friendly
+@@ -114,6 +117,7 @@
+       var $mRevisionTimestamp; # The timestamp of the specified revision ID
+       var $mRevIdForTs;   # The revision ID which was used to fetch the 
timestamp
+ 
++
+       /**
+        * Constructor
+        *
+@@ -262,13 +266,17 @@
+               wfProfileIn( __METHOD__ );
+               wfProfileIn( $fname );
+ 
++
+               if ( $clearState ) {
+                       $this->clearState();
+               }
+ 
+               $this->mOptions = $options;
+               $this->setTitle( $title ); # Page title has to be set for the 
pre-processor
++                $rawTitle = $title;
+ 
++                $rawText = $text;
++
+               $oldRevisionId = $this->mRevisionId;
+               $oldRevisionTimestamp = $this->mRevisionTimestamp;
+               if ( $revid !== null ) {
+@@ -281,143 +289,156 @@
+               wfRunHooks( 'ParserAfterStrip', array( &$this, &$text, 
&$this->mStripState ) );
+               $text = $this->internalParse( $text );
+ 
+-              $text = $this->mStripState->unstripGeneral( $text );
++                if (!USE_LIBMWPARSER) {
++                    $text = $this->mStripState->unstripGeneral( $text );
+ 
+-              # Clean up special characters, only run once, next-to-last 
before doBlockLevels
+-              $fixtags = array(
+-                      # french spaces, last one Guillemet-left
+-                      # only if there is something before the space
+-                      '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1 \\2',
+-                      # french spaces, Guillemet-right
+-                      '/(\\302\\253) /' => '\\1 ',
+-                      '/ (!\s*important)/' => ' \\1', # Beware of CSS 
magic word !important, bug #11874.
+-              );
+-              $text = preg_replace( array_keys( $fixtags ), array_values( 
$fixtags ), $text );
++                    # Clean up special characters, only run once, 
next-to-last before doBlockLevels
++                        $fixtags = array(
++                                         # french spaces, last one 
Guillemet-left
++                                         # only if there is something before 
the space
++                                         '/(.) (?=\\?|:|;|!|%|\\302\\273)/' 
=> '\\1 \\2',
++                                         # french spaces, Guillemet-right
++                                         '/(\\302\\253) /' => '\\1 ',
++                                         '/ (!\s*important)/' => ' \\1', 
# Beware of CSS magic word !important, bug #11874.
++                                         );
++                    $text = preg_replace( array_keys( $fixtags ), 
array_values( $fixtags ), $text );
+ 
+-              $text = $this->doBlockLevels( $text, $linestart );
++                    $text = $this->doBlockLevels( $text, $linestart );
+ 
+-              $this->replaceLinkHolders( $text );
++                    $this->replaceLinkHolders( $text );
+ 
+-              /**
+-               * The page doesn't get language converted if
+-               * a) It's disabled
+-               * b) Content isn't converted
+-               * c) It's a conversion table
+-               */
+-              if ( !( $wgDisableLangConversion
+-                              || isset( 
$this->mDoubleUnderscores['nocontentconvert'] )
+-                              || $this->mTitle->isConversionTable() ) ) {
++                    /**
++                     * The page doesn't get language converted if
++                     * a) It's disabled
++                     * b) Content isn't converted
++                     * c) It's a conversion table
++                     */
++                    if ( !( $wgDisableLangConversion
++                            || isset( 
$this->mDoubleUnderscores['nocontentconvert'] )
++                            || $this->mTitle->isConversionTable() ) ) {
+ 
+                       # The position of the convert() call should not be 
changed. it
+-                      # assumes that the links are all replaced and the only 
thing left
+-                      # is the <nowiki> mark.
++                            # assumes that the links are all replaced and the 
only thing left
++                            # is the <nowiki> mark.
+ 
+-                      $text = $wgContLang->convert( $text );
+-              }
++                            $text = $wgContLang->convert( $text );
++                    }
+ 
+-              /**
+-               * A page get its title converted except:
+-               * a) Language conversion is globally disabled
+-               * b) Title convert is globally disabled
+-               * c) The page is a redirect page
+-               * d) User request with a "linkconvert" set to "no"
+-               * e) A "nocontentconvert" magic word has been set
+-               * f) A "notitleconvert" magic word has been set
+-               * g) User sets "noconvertlink" in his/her preference
+-               *
+-               * Note that if a user tries to set a title in a conversion
+-               * rule but content conversion was not done, then the parser
+-               * won't pick it up.  This is probably expected behavior.
+-               */
+-              if ( !( $wgDisableLangConversion
+-                              || $wgDisableTitleConversion
+-                              || isset( 
$this->mDoubleUnderscores['nocontentconvert'] )
+-                              || isset( 
$this->mDoubleUnderscores['notitleconvert'] )
+-                              || $this->mOutput->getDisplayTitle() !== false 
) ) 
+-              {
+-                      $convruletitle = $wgContLang->getConvRuleTitle();
+-                      if ( $convruletitle ) {
++                    /**
++                     * A page get its title converted except:
++                     * a) Language conversion is globally disabled
++                     * b) Title convert is globally disabled
++                     * c) The page is a redirect page
++                     * d) User request with a "linkconvert" set to "no"
++                     * e) A "nocontentconvert" magic word has been set
++                     * f) A "notitleconvert" magic word has been set
++                     * g) User sets "noconvertlink" in his/her preference
++                     *
++                     * Note that if a user tries to set a title in a 
conversion
++                     * rule but content conversion was not done, then the 
parser
++                     * won't pick it up.  This is probably expected behavior.
++                     */
++                    if ( !( $wgDisableLangConversion
++                            || $wgDisableTitleConversion
++                            || isset( 
$this->mDoubleUnderscores['nocontentconvert'] )
++                            || isset( 
$this->mDoubleUnderscores['notitleconvert'] )
++                            || $this->mOutput->getDisplayTitle() !== false ) 
) 
++                        {
++                            $convruletitle = $wgContLang->getConvRuleTitle();
++                            if ( $convruletitle ) {
+                               $this->mOutput->setTitleText( $convruletitle );
+-                      } else {
++                            } else {
+                               $titleText = $wgContLang->convertTitle( $title 
);
+                               $this->mOutput->setTitleText( $titleText );
+-                      }
+-              }
++                            }
++                        }
+ 
+-              $text = $this->mStripState->unstripNoWiki( $text );
++                    $text = $this->mStripState->unstripNoWiki( $text );
+ 
+-              wfRunHooks( 'ParserBeforeTidy', array( &$this, &$text ) );
++                    wfRunHooks( 'ParserBeforeTidy', array( &$this, &$text ) );
+ 
+-//!JF Move to its own function
++                    //!JF Move to its own function
+ 
+-              $uniq_prefix = $this->mUniqPrefix;
+-              $matches = array();
+-              $elements = array_keys( $this->mTransparentTagHooks );
+-              $text = self::extractTagsAndParams( $elements, $text, $matches, 
$uniq_prefix );
++                    $uniq_prefix = $this->mUniqPrefix;
++                    $matches = array();
++                    $elements = array_keys( $this->mTransparentTagHooks );
++                    $text = self::extractTagsAndParams( $elements, $text, 
$matches, $uniq_prefix );
+ 
+-              foreach ( $matches as $marker => $data ) {
++                    foreach ( $matches as $marker => $data ) {
+                       list( $element, $content, $params, $tag ) = $data;
+                       $tagName = strtolower( $element );
+                       if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
+-                              $output = call_user_func_array( 
$this->mTransparentTagHooks[$tagName], array( $content, $params, $this ) );
++                            $output = call_user_func_array( 
$this->mTransparentTagHooks[$tagName], array( $content, $params, $this ) );
+                       } else {
+-                              $output = $tag;
++                            $output = $tag;
+                       }
+                       $this->mStripState->general->setPair( $marker, $output 
);
+-              }
+-              $text = $this->mStripState->unstripGeneral( $text );
++                    }
++                    $text = $this->mStripState->unstripGeneral( $text );
+ 
+-              $text = Sanitizer::normalizeCharReferences( $text );
++                    $text = Sanitizer::normalizeCharReferences( $text );
+ 
+-              if ( ( $wgUseTidy && $this->mOptions->mTidy ) || 
$wgAlwaysUseTidy ) {
++                    if ( ( $wgUseTidy && $this->mOptions->mTidy ) || 
$wgAlwaysUseTidy ) {
+                       $text = MWTidy::tidy( $text );
+-              } else {
++                    } else {
+                       # attempt to sanitize at least some nesting problems
+-                      # (bug #2702 and quite a few others)
+-                      $tidyregs = array(
+-                              # ''Something [http://www.cool.com cool''] -->
+-                              # <i>Something</i><a 
href="http://www.cool.com";..><i>cool></i></a>
+-                              
'/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
+-                              '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
+-                              # fix up an anchor inside another anchor, only
+-                              # at least for a single single nested link (bug 
3695)
+-                              
'/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
+-                              '\\1\\2</a>\\3</a>\\1\\4</a>',
+-                              # fix div inside inline elements- doBlockLevels 
won't wrap a line which
+-                              # contains a div, so fix it up here; replace
+-                              # div with escaped text
+-                              '/(<([aib]) 
[^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
+-                              '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
+-                              # remove empty italic or bold tag pairs, some
+-                              # introduced by rules above
+-                              '/<([bi])><\/\\1>/' => '',
+-                      );
++                            # (bug #2702 and quite a few others)
++                            $tidyregs = array(
++                                              # ''Something 
[http://www.cool.com cool''] -->
++                                              # <i>Something</i><a 
href="http://www.cool.com";..><i>cool></i></a>
++                                              
'/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
++                                              
'\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
++                                              # fix up an anchor inside 
another anchor, only
++                                              # at least for a single single 
nested link (bug 3695)
++                                              
'/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
++                                              '\\1\\2</a>\\3</a>\\1\\4</a>',
++                                              # fix div inside inline 
elements- doBlockLevels won't wrap a line which
++                                              # contains a div, so fix it up 
here; replace
++                                              # div with escaped text
++                                              '/(<([aib]) 
[^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
++                                              
'\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
++                                              # remove empty italic or bold 
tag pairs, some
++                                              # introduced by rules above
++                                              '/<([bi])><\/\\1>/' => '',
++                                              );
+ 
+                       $text = preg_replace(
+-                              array_keys( $tidyregs ),
+-                              array_values( $tidyregs ),
+-                              $text );
+-              }
+-              global $wgExpensiveParserFunctionLimit;
+-              if ( $this->mExpensiveFunctionCount > 
$wgExpensiveParserFunctionLimit ) {
++                                             array_keys( $tidyregs ),
++                                             array_values( $tidyregs ),
++                                             $text );
++                    }
++                    global $wgExpensiveParserFunctionLimit;
++                    if ( $this->mExpensiveFunctionCount > 
$wgExpensiveParserFunctionLimit ) {
+                       $this->limitationWarn( 'expensive-parserfunction', 
$this->mExpensiveFunctionCount, $wgExpensiveParserFunctionLimit );
+-              }
++                    }
+ 
+-              wfRunHooks( 'ParserAfterTidy', array( &$this, &$text ) );
++                    wfRunHooks( 'ParserAfterTidy', array( &$this, &$text ) );
+ 
+-              # Information on include size limits, for the benefit of users 
who try to skirt them
+-              if ( $this->mOptions->getEnableLimitReport() ) {
+-                      $max = $this->mOptions->getMaxIncludeSize();
+-                      $PFreport = "Expensive parser function count: 
{$this->mExpensiveFunctionCount}/$wgExpensiveParserFunctionLimit\n";
+-                      $limitReport =
+-                              "NewPP limit report\n" .
+-                              "Preprocessor node count: 
{$this->mPPNodeCount}/{$this->mOptions->mMaxPPNodeCount}\n" .
+-                              "Post-expand include size: 
{$this->mIncludeSizes['post-expand']}/$max bytes\n" .
+-                              "Template argument size: 
{$this->mIncludeSizes['arg']}/$max bytes\n".
+-                              $PFreport;
+-                      wfRunHooks( 'ParserLimitReport', array( $this, 
&$limitReport ) );
+-                      $text .= "\n<!-- \n$limitReport-->\n";
+-              }
++                    # Information on include size limits, for the benefit of 
users who try to skirt them
++                    if ( $this->mOptions->getEnableLimitReport() ) {
++                        $max = $this->mOptions->getMaxIncludeSize();
++                        $PFreport = "Expensive parser function count: 
{$this->mExpensiveFunctionCount}/$wgExpensiveParserFunctionLimit\n";
++                        $limitReport =
++                            "NewPP limit report\n" .
++                            "Preprocessor node count: 
{$this->mPPNodeCount}/{$this->mOptions->mMaxPPNodeCount}\n" .
++                            "Post-expand include size: 
{$this->mIncludeSizes['post-expand']}/$max bytes\n" .
++                            "Template argument size: 
{$this->mIncludeSizes['arg']}/$max bytes\n".
++                            $PFreport;
++                        wfRunHooks( 'ParserLimitReport', array( $this, 
&$limitReport ) );
++                        $text .= "\n<!-- \n$limitReport-->\n";
++                    }
++                } else {
++                    # libmwparse
++                    $istream = new_MWParserInput($text);
++                    
++                    $parser = new_MWParser($istream);
++
++                    $text = implode('', MWParseArticle($parser));
++                    delete_MWParser($parser);
++                    delete_MWParserInput($istream);
++
++                }
++                
+               $this->mOutput->setText( $text );
+ 
+               $this->mRevisionId = $oldRevisionId;
+@@ -1051,38 +1072,42 @@
+                       $text = $frame->expand( $dom );
+               } else {
+                       # if $frame is not provided, then use old-style 
replaceVariables
+-                      $text = $this->replaceVariables( $text );
++                        if (!USE_LIBMWPARSER) {
++                               $text = $this->replaceVariables( $text );
++                        }
+               }
+ 
+-              $text = Sanitizer::removeHTMLtags( $text, array( &$this, 
'attributeStripCallback' ), false, array_keys( $this->mTransparentTagHooks ) );
+-              wfRunHooks( 'InternalParseBeforeLinks', array( &$this, &$text, 
&$this->mStripState ) );
++                if (!USE_LIBMWPARSER) {
++                    $text = Sanitizer::removeHTMLtags( $text, array( &$this, 
'attributeStripCallback' ), false, array_keys( $this->mTransparentTagHooks ) );
++                    wfRunHooks( 'InternalParseBeforeLinks', array( &$this, 
&$text, &$this->mStripState ) );
+ 
+-              # Tables need to come after variable replacement for things to 
work
+-              # properly; putting them before other transformations should 
keep
+-              # exciting things like link expansions from showing up in 
surprising
+-              # places.
+-              $text = $this->doTableStuff( $text );
++                    # Tables need to come after variable replacement for 
things to work
++                                                                              
      # properly; putting them before other transformations should keep
++                                                                              
                      # exciting things like link expansions from showing up in 
surprising
++                                                                              
                      # places.
++                                                                              
                      $text = $this->doTableStuff( $text );
+ 
+-              $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
+-
++                    $text = preg_replace( '/(^|\n)-----*'.'/', '\\1<hr />', 
$text );
++                }
+               $text = $this->doDoubleUnderscore( $text );
+-
+-              $text = $this->doHeadings( $text );
+-              if ( $this->mOptions->getUseDynamicDates() ) {
++                if (!USE_LIBMWPARSER) {
++                    $text = $this->doHeadings( $text );
++                    if ( $this->mOptions->getUseDynamicDates() ) {
+                       $df = DateFormatter::getInstance();
+                       $text = $df->reformat( 
$this->mOptions->getDateFormat(), $text );
+-              }
+-              $text = $this->replaceInternalLinks( $text );
+-              $text = $this->doAllQuotes( $text );
+-              $text = $this->replaceExternalLinks( $text );
++                    }
++                    $text = $this->replaceInternalLinks( $text );
++                    $text = $this->doAllQuotes( $text );
++                    $text = $this->replaceExternalLinks( $text );
+ 
+-              # replaceInternalLinks may sometimes leave behind
+-              # absolute URLs, which have to be masked to hide them from 
replaceExternalLinks
+-              $text = str_replace( $this->mUniqPrefix.'NOPARSE', '', $text );
++                    # replaceInternalLinks may sometimes leave behind
++                        # absolute URLs, which have to be masked to hide them 
from replaceExternalLinks
++                        $text = str_replace( $this->mUniqPrefix.'NOPARSE', 
'', $text );
+ 
+-              $text = $this->doMagicLinks( $text );
+-              $text = $this->formatHeadings( $text, $origText, $isMain );
+-
++                    $text = $this->doMagicLinks( $text );
++                    $text = $this->formatHeadings( $text, $origText, $isMain 
);
++                    
++                }
+               wfProfileOut( __METHOD__ );
+               return $text;
+       }
+Index: includes/parser/LinkResolver.php
+===================================================================
+--- includes/parser/LinkResolver.php   (revision 0)
++++ includes/parser/LinkResolver.php   (revision 0)
+@@ -0,0 +1,103 @@
++<?php
++
++include_once('MediaWiki/mwp.php');
++
++# TODO add support for GetLinkColors hook here
++
++function MWParserLinkResolverCallback($linkCollection) {
++        $lcKeys = MWLinkCollectionGet($linkCollection);
++        $linkCache = LinkCache::singleton();
++        $dbr = wfGetDB( DB_SLAVE );
++        $page = $dbr->tableName( 'page' );
++
++        $links = array();
++
++        foreach ($lcKeys as $lcKey) {
++                $type = MWLCKeyGetLinkType($lcKey);
++                if ($type == MWLT_EXTERNAL) {
++                        continue;
++                }
++                $title = Title::newFromText(MWLCKeyGetLinkTitle($lcKey));
++                if ($type == MWLT_INTERNAL) {
++                        $ns = $title->getNamespace();
++                        $links[$title->getDBKey()] = $lcKey;
++
++                        # Not in the link cache, add it to the query
++                                if ( !isset( $current ) ) {
++                                        $current = $ns;
++                                        $query =  "SELECT page_id, 
page_namespace, page_title, page_is_redirect, page_len, page_latest";
++                                        $query .= " FROM $page WHERE 
(page_namespace=$ns AND page_title IN(";
++                                } elseif ( $current != $ns ) {
++                                $current = $ns;
++                                $query .= ")) OR (page_namespace=$ns AND 
page_title IN(";
++                        } else {
++                                $query .= ', ';
++                        }
++
++                        $query .= $dbr->addQuotes( $title->getDBkey() );
++                } elseif ($type == MWLT_MEDIA) {
++                        $linkResolution = new_MWLINKRESOLUTION();
++                        $time = false;
++                        // TODO File hook
++                        $img  = wfFindFile( $title, array( 'time' => $time ) 
);
++                        $alt = htmlspecialchars( $title->getText(),  
ENT_QUOTES );
++                        MWLINKRESOLUTION_alt_set($linkResolution, $alt);
++                        MWLINKRESOLUTION_url_set($linkResolution,  
$title->getLocalURL() );
++                        if ($img) {
++                                
MWLINKRESOLUTION_imageUrl_set($linkResolution, 
htmlspecialchars($img->getURL()));
++                                MWLINKRESOLUTION_c_class_set($linkResolution, 
'internal');
++                                if ($img->getWidth()) {
++                                        
MWLINKRESOLUTION_imageWidth_set($linkResolution, $img->getWidth());
++                                }
++                                if ($img->getHeight()) {
++                                        
MWLINKRESOLUTION_imageHeight_set($linkResolution, $img->getHeight());
++                                }
++                        } else {
++                                MWLINKRESOLUTION_c_class_set($linkResolution, 
'new');
++                        }
++                        MWLinkCollectionResolve($linkCollection, $lcKey, 
$linkResolution);
++                }
++        }
++
++        if ( $query ) {
++                $query .= '))';
++
++                wfDebugLog('linkresolver', "Link resolver callback query: 
'$query'");
++                $res = $dbr->query( $query, __METHOD__ );
++
++                # Fetch data and form into an associative array
++                # non-existent = broken
++                while ( $s = $dbr->fetchObject($res) ) {
++                        $title = Title::makeTitle( $s->page_namespace, 
$s->page_title );
++                        $lcKey = $links[$title->getDBKey()];
++                        unset($links[$title->getDBKey()]);
++                        wfDebugLog('linkresolver', 'Resolving ' . 
MWLCKeyGetLinkTitle($lcKey) . ' to ' . $title->getLocalURL());
++                        $linkResolution = new_MWLINKRESOLUTION();
++                        MWLINKRESOLUTION_url_set($linkResolution, 
$title->getLocalURL());
++                        $linkCache->addGoodLinkObj( $s->page_id, $title, 
$s->page_len, $s->page_is_redirect, $s->page_latest );
++                        MWLINKRESOLUTION_color_set($linkResolution, 
MWLINKCOLOR_BLUE);
++                        MWLinkCollectionResolve($linkCollection, $lcKey, 
$linkResolution);
++                }
++                unset( $res );
++        }
++
++        foreach ($links as $lcKey) {
++                $title = Title::newFromText(MWLCKeyGetLinkTitle($lcKey));
++                $linkResolution = new_MWLINKRESOLUTION();
++
++                if ($title->isKnown()) {
++                        MWLINKRESOLUTION_url_set($linkResolution, 
$title->getLocalURL());
++                        MWLINKRESOLUTION_color_set($linkResolution, 
MWLINKCOLOR_BLUE);
++                } else {
++                        MWLINKRESOLUTION_url_set($linkResolution, 
$title->getLocalURL('action=edit&redlink=1'));
++                        MWLINKRESOLUTION_color_set($linkResolution, 
MWLINKCOLOR_RED);
++                }
++                MWLinkCollectionResolve($linkCollection, $lcKey, 
$linkResolution);
++        }
++
++
++        wfProfileOut( __METHOD__ );
++
++}
++
++?>
+\ No newline at end of file



_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to