Daniel Friesen has uploaded a new change for review.
https://gerrit.wikimedia.org/r/67605
Change subject: Add a Html::escape method.
......................................................................
Add a Html::escape method.
Our Html::element method has it's own nice way of escaping html instead
of using the excessive htmlspecialchars. Moving this escaping into a separate
method should allow for that same escaping in cases where you have to use
Html::rawElement instead of element. Such as:
Html::rawElement( 'div', null,
Html::escape( $text ) .
Html::element( 'span, null, $text2 )
);
While we're at it replace strtr with str_replace. strtr is really inefficient.
Change-Id: I87a5cff62a6270ce53362acfb1ad8007cfa862e1
---
M includes/Html.php
M tests/phpunit/includes/HtmlTest.php
2 files changed, 65 insertions(+), 8 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core
refs/changes/05/67605/1
diff --git a/includes/Html.php b/includes/Html.php
index a8432f8..df63481 100644
--- a/includes/Html.php
+++ b/includes/Html.php
@@ -102,6 +102,29 @@
);
/**
+ * Escape text for inclusion into chunks of html NOT attributes.
+ * This method is less strict than htmlspecialchars and doesn't escape
+ * things that don't need to be escaped in a html body.
+ *
+ * @param string $text The text to escape.
+ * @return string Escaped text for inclusion into html.
+ */
+ public static function escape( $text ) {
+ static $map = array(
+ // There's no point in escaping quotes, >, etc. in the
contents
+ // of elements.
+ '&' => '&',
+ '<' => '<',
+ );
+ static $keys, $values;
+ if ( is_null( $keys ) ) {
+ $keys = array_keys( $map );
+ $values = array_values( $map );
+ }
+ return str_replace( $keys, $values, $text );
+ }
+
+ /**
* Returns an HTML element in a string. The major advantage here over
* manually typing out the HTML is that it will escape all attribute
* values. If you're hardcoding all the attributes, or there are none,
you
@@ -146,12 +169,7 @@
* @return string
*/
public static function element( $element, $attribs = array(), $contents
= '' ) {
- return self::rawElement( $element, $attribs, strtr( $contents,
array(
- // There's no point in escaping quotes, >, etc. in the
contents of
- // elements.
- '&' => '&',
- '<' => '<'
- ) ) );
+ return self::rawElement( $element, $attribs, Html::escape(
$contents ) );
}
/**
@@ -529,7 +547,7 @@
}
} else {
// Apparently we need to entity-encode \n, \r,
\t, although the
- // spec doesn't mention that. Since we're
doing strtr() anyway,
+ // spec doesn't mention that. Since we're
doing a replace anyway,
// and we don't need <> escaped here, we may as
well not call
// htmlspecialchars().
// @todo FIXME: Verify that we actually need to
@@ -551,7 +569,7 @@
// @todo FIXME: Is this really true?
$map['<'] = '<';
}
- $ret .= " $key=$quote" . strtr( $value, $map )
. $quote;
+ $ret .= " $key=$quote" . str_replace(
array_keys( $map ), array_values( $map ), $value ) . "$quote";
}
}
return $ret;
diff --git a/tests/phpunit/includes/HtmlTest.php
b/tests/phpunit/includes/HtmlTest.php
index ecfe418..7e92e75 100644
--- a/tests/phpunit/includes/HtmlTest.php
+++ b/tests/phpunit/includes/HtmlTest.php
@@ -41,6 +41,33 @@
) );
}
+ /**
+ * Test the Html::escape method to ensure it does not leave
+ * dangerous html alone and round trips correctly.
+ */
+ public function testEscape() {
+ if ( !class_exists( 'DOMDocument' ) ) {
+ $this->markTestSkipped( 'DOMDocument is required.' );
+ return;
+ }
+
+ $text = "Foo<script>Bar";
+ $escaped = Html::escape( $text );
+
+ // Test the escaped text by creating a real DOM out of if and
+ // then checking to ensure that reading the plaintxt from the
+ // node the escaped text is inside round trips back to the
+ // original unescaped text.
+ $doc = new DOMDocument;
+ @$doc->loadHTML( "<div>$escaped</div>" );
+ $xpath = new DOMXPath( $doc );
+
+ $this->assertEquals(
+ $text,
+ $xpath->evaluate( 'string(//div/text())' )
+ );
+ }
+
public function testElementBasics() {
$this->assertEquals(
'<img>',
@@ -60,6 +87,18 @@
'Close tag for empty element (array, string)'
);
+ $this->assertEquals(
+ '<element><script>&</element>',
+ Html::element( 'element', array(), '<script>&' ),
+ 'Html escaped contents for element'
+ );
+
+ $this->assertEquals(
+ '<element attr="value<>" 	"></element>',
+ Html::element( 'element', array( 'attr' =>
"value<>\"\n\t"), '' ),
+ 'escaped attributes for element'
+ );
+
$this->setMwGlobals( 'wgWellFormedXml', true );
$this->assertEquals(
--
To view, visit https://gerrit.wikimedia.org/r/67605
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I87a5cff62a6270ce53362acfb1ad8007cfa862e1
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Daniel Friesen <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits