https://www.mediawiki.org/wiki/Special:Code/MediaWiki/112465

Revision: 112465
Author:   wikinaut
Date:     2012-02-27 01:10:57 +0000 (Mon, 27 Feb 2012)
Log Message:
-----------
fix for ultra bug 30028 . The RSS extension can parse RSS and ATOM feeds of 
different flavours. The php xml dom xpath query uses now a namespace-safe 
method to find all elements like item (RSS, RDF) or entry (ATOM). Further fixed 
a hidden problem when the feed url was redirecting, this threw the Cannot parse 
RSS for XML error, which is now history. Introduced a new parameter 
wgRSSUrlNumberOfAllowedRedirects which defaults to zero, i.e. no redirects are 
allowed by default. See Manual page

Modified Paths:
--------------
    trunk/extensions/RSS/RELEASE-NOTES
    trunk/extensions/RSS/RSS.i18n.php
    trunk/extensions/RSS/RSS.php
    trunk/extensions/RSS/RSSData.php
    trunk/extensions/RSS/RSSHooks.php
    trunk/extensions/RSS/RSSParser.php

Modified: trunk/extensions/RSS/RELEASE-NOTES
===================================================================
--- trunk/extensions/RSS/RELEASE-NOTES  2012-02-27 01:07:04 UTC (rev 112464)
+++ trunk/extensions/RSS/RELEASE-NOTES  2012-02-27 01:10:57 UTC (rev 112465)
@@ -10,13 +10,20 @@
   coming in. Then you could abort cleanly once it's gotten too much
   (otherwise using the defaults - PHP will abort the entire program when your
   memory usage gets too high)
-* bug 30028 "Error parsing XML for RSS" - improve and harden Extension:RSS when
-  parsing differently flavoured RSS feeds and ATOM feeds
 
+=== Version 2.10 2012-02-27 ===
+* final solution of bug 30028 "Error parsing XML for RSS" - improve and harden
+  Extension:RSS when parsing differently flavoured RSS feeds and ATOM feeds
+* new parameter $wgRSSUrlNumberOfAllowedRedirects (default = 0)
+  Some feed urls redirect. The new RSS version can deal with redirects,
+  but it must be expressly enabled. For example, you can set
+  $wgRSSUrlNumberOfAllowedRedirects = 1;
+
 === Version 2.01 2012-02-24 ===
 * "summary" element of ATOM feed items are shown
   which is handled like "description" element of RSS
 * handling of basic HTML layout tags <p> <br> <b> <i> <u> <s> in item 
description
+
 === Version 2.00 2012-02-24 ===
 * first version which can parse RSS and at least some ATOM feeds
   partial solution of bug 30028 "Error parsing XML for RSS" - improve and 
harden

Modified: trunk/extensions/RSS/RSS.i18n.php
===================================================================
--- trunk/extensions/RSS/RSS.i18n.php   2012-02-27 01:07:04 UTC (rev 112464)
+++ trunk/extensions/RSS/RSS.i18n.php   2012-02-27 01:10:57 UTC (rev 112465)
@@ -22,6 +22,7 @@
        'rss-ns-permission' => 'RSS is not allowed in this namespace',
        'rss-url-is-not-whitelisted' => '"$1" is not in the whitelist of 
allowed feeds. {{PLURAL:$3|$2 is the only allowed feed|The allowed feeds are as 
follows: $2}}.',
        'rss-empty-whitelist' => '"$1" is not in the whitelist of allowed 
feeds. There are no allowed feed URLs in the whitelist.',
+       'rss-deprecated-wgrssallowedfeeds-found' => 'The deprecated variable 
$wgRSSAllowedFeeds has been detected. Since RSS version 2.0 this variable has 
to be replaced by $wgRSSUrlWhitelist as described in the manual page 
Extension:RSS.',
        'rss-item' => '{{$1 | title = {{{title}}} | link = {{{link}}} | date = 
{{{date}}} | author = {{{author}}} | description = {{{description}}} }}',
        'rss-feed' => "<!--  the following are two alternative templates. The 
first is the basic default template for feeds -->; '''<span 
class='plainlinks'>[{{{link}}} {{{title}}}]</span>'''
 : {{{description}}}

Modified: trunk/extensions/RSS/RSS.php
===================================================================
--- trunk/extensions/RSS/RSS.php        2012-02-27 01:07:04 UTC (rev 112464)
+++ trunk/extensions/RSS/RSS.php        2012-02-27 01:10:57 UTC (rev 112465)
@@ -4,7 +4,7 @@
  *
  * @file
  * @ingroup Extensions
- * @version 2.01
+ * @version 2.10
  * @author mutante, Daniel Kinzler, Rdb, Mafs, Thomas Gries, Alxndr, Chris 
Reigrut, K001
  * @author Kellan Elliott-McCrea <[email protected]> -- author of MagpieRSS
  * @author Jeroen De Dauw
@@ -14,7 +14,7 @@
  * @link http://www.mediawiki.org/wiki/Extension:RSS Documentation
  */
 
-define( "EXTENSION_RSS_VERSION", "2.01 20120224" );
+define( "EXTENSION_RSS_VERSION", "2.10 20120227" );
 
 if ( !defined( 'MEDIAWIKI' ) ) {
        die( "This is not a valid entry point.\n" );
@@ -52,7 +52,7 @@
 // for debugging set $wgRSSCacheCompare = 1;
 $wgRSSCacheCompare = false;
 
-// 5 second timeout
+// 15 second timeout
 $wgRSSFetchTimeout = 15;
 
 // Ignore the RSS tag in all but the namespaces listed here.
@@ -77,6 +77,11 @@
 // include "*" if you expressly want to allow all urls (you should not do this)
 // $wgRSSUrlWhitelist = array( "*" );
 
+// Maximum number of redirects to follow (defaults to 0)
+// Note: this should only be used when the target URLs are trusted,
+// to avoid attacks on intranet services accessible by HTTP.
+$wgRSSUrlNumberOfAllowedRedirects = 0;
+
 // Agent to use for fetching feeds
 $wgRSSUserAgent = "MediaWikiRSS/" . strtok( EXTENSION_RSS_VERSION, " " ) . " 
(+http://www.mediawiki.org/wiki/Extension:RSS) / MediaWiki RSS extension";
 
@@ -89,4 +94,4 @@
 // limit the number of characters in the item description
 // or set to false for unlimited length.
 // $wgRSSItemMaxLength = false;
-// $wgRSSItemMaxLength = 100;
+$wgRSSItemMaxLength = 200;

Modified: trunk/extensions/RSS/RSSData.php
===================================================================
--- trunk/extensions/RSS/RSSData.php    2012-02-27 01:07:04 UTC (rev 112464)
+++ trunk/extensions/RSS/RSSData.php    2012-02-27 01:10:57 UTC (rev 112465)
@@ -16,26 +16,13 @@
                }
                $xpath = new DOMXPath( $xml );
        
-               // register namespace as below, and apply a regex to the 
expression
-               // http://de3.php.net/manual/en/domxpath.query.php#103461
-               $namespaceURI = $xml->lookupnamespaceURI( NULL );
+               // namespace-safe method to find all elements
+               $items = $xpath->query( "//*[local-name() = 'item']" ); 
 
-               if ( ( null !== $namespaceURI ) ) {
-                       $defaultNS = "defaultNS";
-                       $xpath->registerNamespace( $defaultNS, $namespaceURI );
-                       $defaultNS = "defaultNS:";
-               } else {
-                       $defaultNS = "";
+               if ( $items->length == 0 ) {
+                       $items = $xpath->query( "//*[local-name() = 'entry']" 
); 
                }
 
-               // is it an RSS feed ?
-               $items = $xpath->query( $this->namespacePrefixedQuery( 
"/rss/channel/item", $defaultNS ) ); 
-
-               if ( $items->length === 0 ) {
-                        // or is it an ATOM feed ?
-                       $items = $xpath->query( $this->namespacePrefixedQuery( 
"/feed/entry", $defaultNS ) );
-               }
-
                if( $items->length !== 0 ) {
                        foreach ( $items as $item ) {
                                $bit = array();
@@ -61,14 +48,6 @@
                }
        }
 
-       protected function namespacePrefixedQuery( $query, $namespace = "" ) {
-               if ( $namespace !== "" ) {
-                       $ret = preg_replace( 
'#(::|/\s*|\A)(?![/@].+?|[a-z\-]+::)#', '$1' . $namespace . '$2', $query );
-               } else {
-                       $ret = $query;
-               }
-               return $ret;
-       }
        /**
         * Return a string that will be used to map RSS elements that
         * contain similar data (e.g. dc:date, date, and pubDate) to the

Modified: trunk/extensions/RSS/RSSHooks.php
===================================================================
--- trunk/extensions/RSS/RSSHooks.php   2012-02-27 01:07:04 UTC (rev 112464)
+++ trunk/extensions/RSS/RSSHooks.php   2012-02-27 01:10:57 UTC (rev 112465)
@@ -21,19 +21,24 @@
         * @param $frame PPFrame parser context
         */
        static function renderRss( $input, $args, $parser, $frame ) {
-               global $wgRSSCacheAge, $wgRSSCacheCompare, $wgRSSNamespaces, 
$wgRSSUrlWhitelist;
+               global $wgRSSCacheAge, $wgRSSCacheCompare, $wgRSSNamespaces, 
+                       $wgRSSUrlWhitelist,$wgRSSAllowedFeeds;
 
                if ( is_array( $wgRSSNamespaces ) && count( $wgRSSNamespaces ) 
) {
                        $ns = $parser->getTitle()->getNamespace();
                        $checkNS = array_flip( $wgRSSNamespaces );
 
                        if( !isset( $checkNS[$ns] ) ) {
-                               return wfMsg( 'rss-ns-permission' );
+                               return RSSUtils::RSSError( 'rss-ns-permission' 
);
                        }
                }
 
                switch ( true ) {
        
+               case ( isset( $wgRSSAllowedFeeds ) ): 
+                       return RSSUtils::RSSError( 
'rss-deprecated-wgrssallowedfeeds-found' );
+                       break;
+
                # disallow because there is no whitelist or empty whitelist
                case ( !isset( $wgRSSUrlWhitelist ) 
                        || !is_array( $wgRSSUrlWhitelist )
@@ -59,7 +64,7 @@
                }
                
                if ( !Http::isValidURI( $input ) ) {
-                       return wfMsg( 'rss-invalid-url', htmlspecialchars( 
$input ) );
+                       return RSSutils::RSSError( 'rss-invalid-url', 
htmlspecialchars( $input ) );
                }
                if ( $wgRSSCacheCompare ) {
                        $timeout = $wgRSSCacheCompare;
@@ -79,7 +84,7 @@
                }
 
                if ( !is_object( $rss->rss ) || !is_array( $rss->rss->items ) ) 
{
-                       return wfMsg( 'rss-empty', htmlspecialchars( $input ) );
+                       return RSSUtils::RSSError( 'rss-empty', 
htmlspecialchars( $input ) );
                }
 
                return $rss->renderFeed( $parser, $frame );

Modified: trunk/extensions/RSS/RSSParser.php
===================================================================
--- trunk/extensions/RSS/RSSParser.php  2012-02-27 01:07:04 UTC (rev 112464)
+++ trunk/extensions/RSS/RSSParser.php  2012-02-27 01:10:57 UTC (rev 112465)
@@ -218,7 +218,8 @@
         * @return Status object
         */
        protected function fetchRemote( $key, array $headers = array()) {
-               global $wgRSSFetchTimeout, $wgRSSUserAgent, $wgRSSProxy;
+               global $wgRSSFetchTimeout, $wgRSSUserAgent, $wgRSSProxy,
+                       $wgRSSUrlNumberOfAllowedRedirects;
 
                if ( $this->etag ) {
                        wfDebugLog( 'RSS', 'Used etag: ' . $this->etag );
@@ -244,16 +245,54 @@
                 */
 
                $url = $this->url;
-               $noProxy = false;
+               $noProxy = !isset( $wgRSSProxy );
                
                // Example for disabling proxy use for certain urls
                // $noProxy = preg_match( '!\.internal\.example\.com$!i', 
parse_url( $url, PHP_URL_HOST ) );
-               
+
+       /**
+        * Copied from HttpFunctions.php
+        * Perform an HTTP request
+        *
+        * @param $method String: HTTP method. Usually GET/POST
+        * @param $url String: full URL to act on. If protocol-relative, will 
be expanded to an http:// URL
+        * @param $options Array: options to pass to MWHttpRequest object.
+        *      Possible keys for the array:
+        *    - timeout             Timeout length in seconds
+        *    - postData            An array of key-value pairs or a 
url-encoded form data
+        *    - proxy               The proxy to use.
+        *                          Otherwise it will use $wgHTTPProxy (if set)
+        *                          Otherwise it will use the environment 
variable "http_proxy" (if set)
+        *    - noProxy             Don't use any proxy at all. Takes 
precedence over proxy value(s).
+        *    - sslVerifyHost       (curl only) Verify hostname against 
certificate
+        *    - sslVerifyCert       (curl only) Verify SSL certificate
+        *    - caInfo              (curl only) Provide CA information
+        *    - maxRedirects        Maximum number of redirects to follow 
(defaults to 5)
+        *    - followRedirects     Whether to follow redirects (defaults to 
false).
+        *                                  Note: this should only be used when 
the target URL is trusted,
+        *                                  to avoid attacks on intranet 
services accessible by HTTP.
+        *    - userAgent           A user agent, if you want to override the 
default
+        *                          MediaWiki/$wgVersion
+        * @return Mixed: (bool)false on failure or a string on success
+        */
+
+               if ( isset( $wgRSSUrlNumberOfAllowedRedirects ) 
+                       && is_numeric( $wgRSSUrlNumberOfAllowedRedirects ) ) {
+                       $maxRedirects = $wgRSSUrlNumberOfAllowedRedirects;
+               } else {
+                       $maxRedirects = 0;
+               }
+
+               // we set followRedirects intentionally to true to see error 
messages
+               // in cases where the maximum number of redirects is reached
                $client = HttpRequest::factory( $url,
                        array( 
-                               'timeout' => $wgRSSFetchTimeout,
-                               'proxy'   => $wgRSSProxy,
-                               'noProxy' => $noProxy,
+                               'timeout'         => $wgRSSFetchTimeout,
+                               'followRedirects' => true,
+                               'maxRedirects'    => $maxRedirects,
+                               'proxy'           => $wgRSSProxy,
+                               'noProxy'         => $noProxy,
+                               'userAgent'       => $wgRSSUserAgent,
                        ) 
                );
 
@@ -506,8 +545,8 @@
         *
         * @param $text String: the text to examine
         * @param $filterType String: "filterOut" to check for matches in the
-        *                                                              
filterOut member list.
-        *                                                              
Otherwise, uses the filter member list.
+        *   filterOut member list.
+        *   Otherwise, uses the filter member list.
         * @return Boolean: decision to filter or not.
         */
        protected function filter( $text, $filterType ) {
@@ -591,7 +630,7 @@
        * @param String|Array $param Error parameter (or parameters)
        * @return String Html that is the error.
        */
-       public static function RSSError( $errorMessageName, $param ) {
+       public static function RSSError( $errorMessageName, $param = false ) {
 
                // Anything from a parser tag should use Content lang for 
message,
                // since the cache doesn't vary by user language: do not use 
wfMsgForContent but wfMsgForContent


_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to