jenkins-bot has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/392990 )
Change subject: Parser: Add guessSectionNameFromStrippedText() and refactor
......................................................................
Parser: Add guessSectionNameFromStrippedText() and refactor
Split up guessSectionNameFromWikiText() into pieces to reduce code
duplication, and provide guessSectionNameFromStrippedText() which
doesn't do link stripping.
Really these should be named guessSection*ANCHOR*From... because they
return an anchor (with encoding and a '#' prefix) instead of a section
name, but I didn't want to rename the existing one.
Also make normalizeSectionName static (it doesn't use $this) so that
guessSectionNameFromStrippedText() can be static as well.
Change-Id: I56b9dda805a51517549c5ed709f4bd747ca04577
---
M includes/parser/Parser.php
1 file changed, 51 insertions(+), 34 deletions(-)
Approvals:
Tim Starling: Looks good to me, approved
jenkins-bot: Verified
diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php
index 3548da9..87d76fa 100644
--- a/includes/parser/Parser.php
+++ b/includes/parser/Parser.php
@@ -4207,7 +4207,7 @@
# Decode HTML entities
$safeHeadline = Sanitizer::decodeCharReferences(
$safeHeadline );
- $safeHeadline = $this->normalizeSectionName(
$safeHeadline );
+ $safeHeadline = self::normalizeSectionName(
$safeHeadline );
$fallbackHeadline = Sanitizer::escapeIdForAttribute(
$safeHeadline, Sanitizer::ID_FALLBACK );
$linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline
);
@@ -5756,43 +5756,19 @@
return $this->mDefaultSort;
}
- /**
- * Try to guess the section anchor name based on a wikitext fragment
- * presumably extracted from a heading, for example "Header" from
- * "== Header ==".
- *
- * @param string $text
- *
- * @return string
- */
- public function guessSectionNameFromWikiText( $text ) {
- # Strip out wikitext links(they break the anchor)
- $text = $this->stripSectionName( $text );
+ private static function getSectionNameFromStrippedText( $text ) {
$text = Sanitizer::normalizeSectionNameWhitespace( $text );
$text = Sanitizer::decodeCharReferences( $text );
- $text = $this->normalizeSectionName( $text );
-
- return '#' . Sanitizer::escapeIdForLink( $text );
+ $text = self::normalizeSectionName( $text );
+ return $text;
}
- /**
- * Same as guessSectionNameFromWikiText(), but produces legacy anchors
- * instead, if possible. For use in redirects, since various versions
- * of Microsoft browsers interpret Location: headers as something other
- * than UTF-8, resulting in breakage.
- *
- * @param string $text The section name
- * @return string An anchor
- */
- public function guessLegacySectionNameFromWikiText( $text ) {
+ private static function makeAnchor( $sectionName ) {
+ return '#' . Sanitizer::escapeIdForLink( $sectionName );
+ }
+
+ private static function makeLegacyAnchor( $sectionName ) {
global $wgFragmentMode;
-
- # Strip out wikitext links(they break the anchor)
- $text = $this->stripSectionName( $text );
- $text = Sanitizer::normalizeSectionNameWhitespace( $text );
- $text = Sanitizer::decodeCharReferences( $text );
- $text = $this->normalizeSectionName( $text );
-
if ( isset( $wgFragmentMode[1] ) && $wgFragmentMode[1] ===
'legacy' ) {
// ForAttribute() and ForLink() are the same for legacy
encoding
$id = Sanitizer::escapeIdForAttribute( $text,
Sanitizer::ID_FALLBACK );
@@ -5804,12 +5780,53 @@
}
/**
+ * Try to guess the section anchor name based on a wikitext fragment
+ * presumably extracted from a heading, for example "Header" from
+ * "== Header ==".
+ *
+ * @param string $text
+ * @return string Anchor (starting with '#')
+ */
+ public function guessSectionNameFromWikiText( $text ) {
+ # Strip out wikitext links(they break the anchor)
+ $text = $this->stripSectionName( $text );
+ $sectionName = self::getSectionNameFromStrippedText( $text );
+ return self::makeAnchor( $sectionName );
+ }
+
+ /**
+ * Same as guessSectionNameFromWikiText(), but produces legacy anchors
+ * instead, if possible. For use in redirects, since various versions
+ * of Microsoft browsers interpret Location: headers as something other
+ * than UTF-8, resulting in breakage.
+ *
+ * @param string $text The section name
+ * @return string Anchor (starting with '#')
+ */
+ public function guessLegacySectionNameFromWikiText( $text ) {
+ # Strip out wikitext links(they break the anchor)
+ $text = $this->stripSectionName( $text );
+ $sectionName = self::getSectionNameFromStrippedText( $text );
+ return self::makeLegacyAnchor( $sectionName );
+ }
+
+ /**
+ * Like guessSectionNameFromWikiText(), but takes already-stripped text
as input.
+ * @param string $text Section name (plain text)
+ * @return string Anchor (starting with '#')
+ */
+ public static function guessSectionNameFromStrippedText( $text ) {
+ $sectionName = self::getSectionNameFromStrippedText( $text );
+ return self::makeAnchor( $sectionName );
+ }
+
+ /**
* Apply the same normalization as code making links to this section
would
*
* @param string $text
* @return string
*/
- private function normalizeSectionName( $text ) {
+ private static function normalizeSectionName( $text ) {
# T90902: ensure the same normalization is applied for IDs as
to links
$titleParser =
MediaWikiServices::getInstance()->getTitleParser();
try {
--
To view, visit https://gerrit.wikimedia.org/r/392990
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I56b9dda805a51517549c5ed709f4bd747ca04577
Gerrit-PatchSet: 2
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Catrope <[email protected]>
Gerrit-Reviewer: C. Scott Ananian <[email protected]>
Gerrit-Reviewer: Catrope <[email protected]>
Gerrit-Reviewer: Florianschmidtwelzow <[email protected]>
Gerrit-Reviewer: Jackmcbarn <[email protected]>
Gerrit-Reviewer: Legoktm <[email protected]>
Gerrit-Reviewer: Tim Starling <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits