Daniel Friesen has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/67605


Change subject: Add a Html::escape method.
......................................................................

Add a Html::escape method.

Our Html::element method has it's own nice way of escaping html instead
of using the excessive htmlspecialchars. Moving this escaping into a separate
method should allow for that same escaping in cases where you have to use
Html::rawElement instead of element. Such as:
Html::rawElement( 'div', null,
  Html::escape( $text ) .
  Html::element( 'span, null, $text2 )
);

While we're at it replace strtr with str_replace. strtr is really inefficient.

Change-Id: I87a5cff62a6270ce53362acfb1ad8007cfa862e1
---
M includes/Html.php
M tests/phpunit/includes/HtmlTest.php
2 files changed, 65 insertions(+), 8 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core 
refs/changes/05/67605/1

diff --git a/includes/Html.php b/includes/Html.php
index a8432f8..df63481 100644
--- a/includes/Html.php
+++ b/includes/Html.php
@@ -102,6 +102,29 @@
        );
 
        /**
+        * Escape text for inclusion into chunks of html NOT attributes.
+        * This method is less strict than htmlspecialchars and doesn't escape
+        * things that don't need to be escaped in a html body.
+        *
+        * @param string $text The text to escape.
+        * @return string Escaped text for inclusion into html.
+        */
+       public static function escape( $text ) {
+               static $map = array(
+                       // There's no point in escaping quotes, >, etc. in the 
contents
+                       // of elements.
+                       '&' => '&',
+                       '<' => '&lt;',
+               );
+               static $keys, $values;
+               if ( is_null( $keys ) ) {
+                       $keys = array_keys( $map );
+                       $values = array_values( $map );
+               }
+               return str_replace( $keys, $values, $text );
+       }
+
+       /**
         * Returns an HTML element in a string.  The major advantage here over
         * manually typing out the HTML is that it will escape all attribute
         * values.  If you're hardcoding all the attributes, or there are none, 
you
@@ -146,12 +169,7 @@
         * @return string
         */
        public static function element( $element, $attribs = array(), $contents 
= '' ) {
-               return self::rawElement( $element, $attribs, strtr( $contents, 
array(
-                       // There's no point in escaping quotes, >, etc. in the 
contents of
-                       // elements.
-                       '&' => '&amp;',
-                       '<' => '&lt;'
-               ) ) );
+               return self::rawElement( $element, $attribs, Html::escape( 
$contents ) );
        }
 
        /**
@@ -529,7 +547,7 @@
                                }
                        } else {
                                // Apparently we need to entity-encode \n, \r, 
\t, although the
-                               // spec doesn't mention that.  Since we're 
doing strtr() anyway,
+                               // spec doesn't mention that.  Since we're 
doing a replace anyway,
                                // and we don't need <> escaped here, we may as 
well not call
                                // htmlspecialchars().
                                // @todo FIXME: Verify that we actually need to
@@ -551,7 +569,7 @@
                                        // @todo FIXME: Is this really true?
                                        $map['<'] = '&lt;';
                                }
-                               $ret .= " $key=$quote" . strtr( $value, $map ) 
. $quote;
+                               $ret .= " $key=$quote" . str_replace( 
array_keys( $map ), array_values( $map ), $value ) . "$quote";
                        }
                }
                return $ret;
diff --git a/tests/phpunit/includes/HtmlTest.php 
b/tests/phpunit/includes/HtmlTest.php
index ecfe418..7e92e75 100644
--- a/tests/phpunit/includes/HtmlTest.php
+++ b/tests/phpunit/includes/HtmlTest.php
@@ -41,6 +41,33 @@
                ) );
        }
 
+       /**
+        * Test the Html::escape method to ensure it does not leave
+        * dangerous html alone and round trips correctly.
+        */
+       public function testEscape() {
+               if ( !class_exists( 'DOMDocument' ) ) {
+                       $this->markTestSkipped( 'DOMDocument is required.' );
+                       return;
+               }
+
+               $text = "Foo<script>Bar";
+               $escaped = Html::escape( $text );
+
+               // Test the escaped text by creating a real DOM out of if and
+               // then checking to ensure that reading the plaintxt from the
+               // node the escaped text is inside round trips back to the
+               // original unescaped text.
+               $doc = new DOMDocument;
+               @$doc->loadHTML( "<div>$escaped</div>" );
+               $xpath = new DOMXPath( $doc );
+
+               $this->assertEquals(
+                       $text,
+                       $xpath->evaluate( 'string(//div/text())' )
+               );
+       }
+
        public function testElementBasics() {
                $this->assertEquals(
                        '<img>',
@@ -60,6 +87,18 @@
                        'Close tag for empty element (array, string)'
                );
 
+               $this->assertEquals(
+                       '<element>&lt;script>&amp;</element>',
+                       Html::element( 'element', array(), '<script>&' ),
+                       'Html escaped contents for element'
+               );
+
+               $this->assertEquals(
+                       '<element attr="value<>&quot;&#10;&#9;"></element>',
+                       Html::element( 'element', array( 'attr' => 
"value<>\"\n\t"), '' ),
+                       'escaped attributes for element'
+               );
+
                $this->setMwGlobals( 'wgWellFormedXml', true );
 
                $this->assertEquals(

-- 
To view, visit https://gerrit.wikimedia.org/r/67605
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I87a5cff62a6270ce53362acfb1ad8007cfa862e1
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Daniel Friesen <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to