Putnik has uploaded a new change for review.
https://gerrit.wikimedia.org/r/282640
Change subject: The last sentence of the paragraph was lost.
......................................................................
The last sentence of the paragraph was lost.
Change-Id: I963ca71b73dc7396156e8b5fcf5d2952e4abbc05
---
M includes/ExtractFormatter.php
M tests/ExtractFormatterTest.php
2 files changed, 6 insertions(+), 1 deletion(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/TextExtracts
refs/changes/40/282640/1
diff --git a/includes/ExtractFormatter.php b/includes/ExtractFormatter.php
index e18952e..a2197bf 100644
--- a/includes/ExtractFormatter.php
+++ b/includes/ExtractFormatter.php
@@ -80,7 +80,7 @@
public static function getFirstSentences( $text,
$requestedSentenceCount ) {
// Based on code from OpenSearchXml by Brion Vibber
$endchars = array(
- '[^\p{Lu}]\.[ \n]', '\![ \n]', '\?[ \n]', // regular
ASCII
+ '[^\p{Lu}]\.(?:[ \n]|$)', '[\!\?](?:[ \n]|$)', //
regular ASCII
'。', // full-width ideographic full-stop
'.', '!', '?', // double-width roman forms
'。', // half-width ideographic full stop
diff --git a/tests/ExtractFormatterTest.php b/tests/ExtractFormatterTest.php
index a65bb92..8759921 100644
--- a/tests/ExtractFormatterTest.php
+++ b/tests/ExtractFormatterTest.php
@@ -77,6 +77,11 @@
'Foo is a bar.',
),
array(
+ 'Foo is a bar. Such a smart boy.',
+ 2,
+ 'Foo is a bar. Such a smart boy.',
+ ),
+ array(
'Foo is a bar.',
1,
'Foo is a bar.',
--
To view, visit https://gerrit.wikimedia.org/r/282640
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I963ca71b73dc7396156e8b5fcf5d2952e4abbc05
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/TextExtracts
Gerrit-Branch: master
Gerrit-Owner: Putnik <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits