jenkins-bot has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/388365 )
Change subject: Remove nbsp and similar characters from section IDs
......................................................................
Remove nbsp and similar characters from section IDs
Bug: T90902
Change-Id: I71bdb7dd43c3e532287290e3c691d9739da45475
---
M RELEASE-NOTES-1.31
M includes/parser/Parser.php
M tests/parser/parserTests.txt
3 files changed, 39 insertions(+), 0 deletions(-)
Approvals:
C. Scott Ananian: Looks good to me, but someone else must approve
jenkins-bot: Verified
Kaldari: Looks good to me, approved
diff --git a/RELEASE-NOTES-1.31 b/RELEASE-NOTES-1.31
index 4bfcfcb..3688163 100644
--- a/RELEASE-NOTES-1.31
+++ b/RELEASE-NOTES-1.31
@@ -41,6 +41,7 @@
* …
=== Bug fixes in 1.31 ===
+* (T90902) Non-breaking space in header ID breaks anchor
* …
=== Action API changes in 1.31 ===
diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php
index f2e47dc..3548da9 100644
--- a/includes/parser/Parser.php
+++ b/includes/parser/Parser.php
@@ -4206,6 +4206,9 @@
# Decode HTML entities
$safeHeadline = Sanitizer::decodeCharReferences(
$safeHeadline );
+
+ $safeHeadline = $this->normalizeSectionName(
$safeHeadline );
+
$fallbackHeadline = Sanitizer::escapeIdForAttribute(
$safeHeadline, Sanitizer::ID_FALLBACK );
$linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline
);
$safeHeadline = Sanitizer::escapeIdForAttribute(
$safeHeadline, Sanitizer::ID_PRIMARY );
@@ -5767,6 +5770,8 @@
$text = $this->stripSectionName( $text );
$text = Sanitizer::normalizeSectionNameWhitespace( $text );
$text = Sanitizer::decodeCharReferences( $text );
+ $text = $this->normalizeSectionName( $text );
+
return '#' . Sanitizer::escapeIdForLink( $text );
}
@@ -5786,6 +5791,7 @@
$text = $this->stripSectionName( $text );
$text = Sanitizer::normalizeSectionNameWhitespace( $text );
$text = Sanitizer::decodeCharReferences( $text );
+ $text = $this->normalizeSectionName( $text );
if ( isset( $wgFragmentMode[1] ) && $wgFragmentMode[1] ===
'legacy' ) {
// ForAttribute() and ForLink() are the same for legacy
encoding
@@ -5798,6 +5804,24 @@
}
/**
+ * Apply the same normalization as code making links to this section
would
+ *
+ * @param string $text
+ * @return string
+ */
+ private function normalizeSectionName( $text ) {
+ # T90902: ensure the same normalization is applied for IDs as
to links
+ $titleParser =
MediaWikiServices::getInstance()->getTitleParser();
+ try {
+
+ $parts = $titleParser->splitTitleString( "#$text" );
+ } catch ( MalformedTitleException $ex ) {
+ return $text;
+ }
+ return $parts['fragment'];
+ }
+
+ /**
* Strips a text string of wikitext for use in a section anchor
*
* Accepts a text string and then removes all wikitext from the
diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt
index 3c861ea..1204dbd 100644
--- a/tests/parser/parserTests.txt
+++ b/tests/parser/parserTests.txt
@@ -29536,3 +29536,17 @@
</p><p><a href="#啤酒">#啤酒</a> <a href="#啤酒">#啤酒</a>
</p>
!! end
+
+!! test
+T90902: Normalize weird characters in section IDs
+!! config
+wgFragmentMode=[ 'html5', 'legacy' ]
+!! wikitext
+== Foo bar ==
+[[#Foo bar]]
+
+!! html/php
+<h2><span class="mw-headline" id="Foo_bar">Foo bar</span><span
class="mw-editsection"><span class="mw-editsection-bracket">[</span><a
href="/index.php?title=Parser_test&action=edit&section=1" title="Edit
section: Foo bar">edit</a><span
class="mw-editsection-bracket">]</span></span></h2>
+<p><a href="#Foo_bar">#Foo bar</a>
+</p>
+!! end
--
To view, visit https://gerrit.wikimedia.org/r/388365
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I71bdb7dd43c3e532287290e3c691d9739da45475
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: MaxSem <[email protected]>
Gerrit-Reviewer: C. Scott Ananian <[email protected]>
Gerrit-Reviewer: Jackmcbarn <[email protected]>
Gerrit-Reviewer: Kaldari <[email protected]>
Gerrit-Reviewer: MaxSem <[email protected]>
Gerrit-Reviewer: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits