https://www.mediawiki.org/wiki/Special:Code/MediaWiki/111891

Revision: 111891
Author:   dantman
Date:     2012-02-19 21:43:37 +0000 (Sun, 19 Feb 2012)
Log Message:
-----------
Handle one part of bug 32545 while improving MediaWiki's support for Microdata 
in content by adding support for the <data>, <time>, <meta>, and <link> 
elements. The latter two are only permitted when Microdata is enabled, and for 
security are only allowed to be actual elements when they have a strict set of 
attributes set.

Modified Paths:
--------------
    trunk/phase3/RELEASE-NOTES-1.20
    trunk/phase3/includes/Sanitizer.php
    trunk/phase3/tests/parser/parserTests.txt

Modified: trunk/phase3/RELEASE-NOTES-1.20
===================================================================
--- trunk/phase3/RELEASE-NOTES-1.20     2012-02-19 21:07:55 UTC (rev 111890)
+++ trunk/phase3/RELEASE-NOTES-1.20     2012-02-19 21:43:37 UTC (rev 111891)
@@ -22,6 +22,8 @@
 * (bug 34475) Add support for IP/CIDR notation to tablesorter
 * (bug 27619) Remove preference option to display broken links as link?
 * (bug 15404) Add support for sorting fractions in jquery.tablesorter
+* The <data>, <time>, <meta>, and <link> elements are allowed within WikiText 
for use
+  with Microdata.
 
 === Bug fixes in 1.20 ===
 * (bug 30245) Use the correct way to construct a log page title.

Modified: trunk/phase3/includes/Sanitizer.php
===================================================================
--- trunk/phase3/includes/Sanitizer.php 2012-02-19 21:07:55 UTC (rev 111890)
+++ trunk/phase3/includes/Sanitizer.php 2012-02-19 21:43:37 UTC (rev 111891)
@@ -364,7 +364,7 @@
         * @return string
         */
        static function removeHTMLtags( $text, $processCallback = null, $args = 
array(), $extratags = array(), $removetags = array() ) {
-               global $wgUseTidy;
+               global $wgUseTidy, $wgHtml5, $wgAllowMicrodataAttributes;
 
                static $htmlpairsStatic, $htmlsingle, $htmlsingleonly, 
$htmlnest, $tabletags,
                        $htmllist, $listtags, $htmlsingleallowed, 
$htmlelementsStatic, $staticInitialised;
@@ -381,12 +381,19 @@
                                'ruby', 'rt' , 'rb' , 'rp', 'p', 'span', 
'abbr', 'dfn',
                                'kbd', 'samp'
                        );
+                       if ( $wgHtml5 ) {
+                               $htmlpairsStatic = array_merge( 
$htmlpairsStatic, array( 'data', 'time' ) );
+                       }
                        $htmlsingle = array(
                                'br', 'hr', 'li', 'dt', 'dd'
                        );
                        $htmlsingleonly = array( # Elements that cannot have 
close tags
                                'br', 'hr'
                        );
+                       if ( $wgHtml5 && $wgAllowMicrodataAttributes ) {
+                               $htmlsingle[] = $htmlsingleonly[] = 'meta';
+                               $htmlsingle[] = $htmlsingleonly[] = 'link';
+                       }
                        $htmlnest = array( # Tags that can be nested--??
                                'table', 'tr', 'td', 'th', 'div', 'blockquote', 
'ol', 'ul',
                                'dl', 'font', 'big', 'small', 'sub', 'sup', 
'span'
@@ -528,6 +535,10 @@
                                                        call_user_func_array( 
$processCallback, array( &$params, $args ) );
                                                }
 
+                                               if ( !Sanitizer::validateTag( 
$params, $t ) ) {
+                                                       $badtag = true;
+                                               }
+
                                                # Strip non-approved attributes 
from the tag
                                                $newparams = 
Sanitizer::fixTagAttributes( $params, $t );
                                        }
@@ -709,6 +720,37 @@
        }
 
        /**
+        * Takes attribute names and values for a tag and the tah name and
+        * validates that the tag is allowed to be present.
+        * This DOES NOT validate the attributes, nor does it validate the
+        * tags themselves. This method only handles the special circumstances
+        * where we may want to allow a tag within content but ONLY when it has
+        * specific attributes set.
+        *
+        * @param $
+        */
+       static function validateTag( $params, $element ) {
+               $params = Sanitizer::decodeTagAttributes( $params );
+               
+               if ( $element == 'meta' || $element == 'link' ) {
+                       if ( !isset( $params['itemprop'] ) ) {
+                               // <meta> and <link> must have an itemprop="" 
otherwise they are not valid or safe in content
+                               return false;
+                       }
+                       if ( $element == 'meta' && !isset( $params['content'] ) 
) {
+                               // <meta> must have a content="" for the 
itemprop
+                               return false;
+                       }
+                       if ( $element == 'link' && !isset( $params['href'] ) ) {
+                               // <link> must have an associated href=""
+                               return false;
+                       }
+               }
+
+               return true;
+       }
+
+       /**
         * Take an array of attribute names and values and normalize or discard
         * illegal values for the given element type.
         *
@@ -809,7 +851,7 @@
                                unset( $out['itemid'] );
                                unset( $out['itemref'] );
                        }
-                       # TODO: Strip itemprop if we aren't descendants of an 
itemscope.
+                       # TODO: Strip itemprop if we aren't descendants of an 
itemscope or pointed to by an itemref.
                }
                return $out;
        }
@@ -1483,7 +1525,7 @@
 
                # Numbers refer to sections in HTML 4.01 standard describing 
the element.
                # See: http://www.w3.org/TR/html4/
-               $whitelist = array (
+               $whitelist = array(
                        # 7.5.4
                        'div'        => $block,
                        'center'     => $common, # deprecated
@@ -1611,7 +1653,24 @@
                        # 'title' may not be 100% valid here; it's XHTML
                        # http://www.w3.org/TR/REC-MathML/
                        'math'       => array( 'class', 'style', 'id', 'title' 
),
+               );
+               
+               if ( $wgHtml5 ) {
+                       # HTML5 elements, defined by:
+                       # 
http://www.whatwg.org/specs/web-apps/current-work/multipage/
+                       $whitelist += array(
+                               'data' => array_merge( $common, array( 'value' 
) ),
+                               'time' => array_merge( $common, array( 
'datetime' ) ),
+
+                               // meta and link are only present when 
Microdata is allowed anyways
+                               // so we don't bother adding another condition 
here
+                               // meta and link are only valid for use as 
Microdata so we do not
+                               // allow the common attributes here.
+                               'meta' => array( 'itemprop', 'content' ),
+                               'link' => array( 'itemprop', 'href' ),
                        );
+               }
+
                return $whitelist;
        }
 

Modified: trunk/phase3/tests/parser/parserTests.txt
===================================================================
--- trunk/phase3/tests/parser/parserTests.txt   2012-02-19 21:07:55 UTC (rev 
111890)
+++ trunk/phase3/tests/parser/parserTests.txt   2012-02-19 21:43:37 UTC (rev 
111891)
@@ -5420,6 +5420,26 @@
 !! end
 
 !! test
+Sanitizer: Validating that <meta> and <link> work, but only for Microdata
+!! input
+<div itemscope>
+       <meta itemprop="hello" content="world">
+       <meta http-equiv="refresh" content="5">
+       <link itemprop="hello" href="{{SERVER}}">
+       <link rel="stylesheet" href="{{SERVER}}">
+</div>
+!! result
+<div itemscope="itemscope">
+<p>    <meta itemprop="hello" content="world" />
+       &lt;meta http-equiv="refresh" content="5"&gt;
+</p>
+       <link itemprop="hello" href="http&#58;//Britney-Spears" />
+       &lt;link rel="stylesheet" href="<a rel="nofollow" class="external free" 
href="http://Britney-Spears";>http://Britney-Spears</a>"&gt;
+</div>
+
+!! end
+
+!! test
 Language converter: output gets cut off unexpectedly (bug 5757)
 !! options
 language=zh


_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to