[MediaWiki-commits] [Gerrit] mediawiki...mobileapps[master]: Hygiene: remove extracts.js

BearND (Code Review) Wed, 14 Dec 2016 15:43:21 -0800

BearND has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/327396 )


Change subject: Hygiene: remove extracts.js
......................................................................

Hygiene: remove extracts.js

Follow-up of Idc70d6e989a67421ee5af1b0d2350cfc06a3da94.
Should have removed that then.

Change-Id: I35ee543be4c9ae6d14857fefc1ef42b64f18fa34
---
D lib/extract.js
1 file changed, 0 insertions(+), 112 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/mobileapps 
refs/changes/96/327396/1

diff --git a/lib/extract.js b/lib/extract.js
deleted file mode 100644
index 9142f97..0000000
--- a/lib/extract.js
+++ /dev/null
@@ -1,112 +0,0 @@
-'use strict';
-
-/**
- Article extracts
- */
-
-/**
- * @param {string} [str]
- * @return {string} str, less parenthetical expressions and their leading 
whitespace, if balanced.
- */
-function removeParens(str) {
-    function count(paren) {
-        return ((str || '').match(new RegExp(`\\${paren}`, 'g')) || []).length;
-    }
-
-    const openCount = count('(');
-    const closeCount = count(')');
-    const regex = /\s*\([^()]*\)/g;
-    return openCount && openCount === closeCount ? 
removeParens(str.replace(regex, '')) : str;
-}
-
-/**
- * Find all matches of regex in text, calling callback with each match object
- *
- * TODO: remove when switching to Parsoid. Copied from:
- * 
https://github.com/wikimedia/mediawiki-services-cxserver/blob/0d21a808f7ab6b82086171af927467c1b9460626/lineardoc/Utils.js
- *
- * @param {string} text The text to search
- * @param {Regex} regex The regex to search; should be created for this 
function call
- * @param {Function} callback Function to call with each match
- * @return {Array} The return values from the callback
- */
-function findAll(text, regex, callback) {
-    const boundaries = [];
-    do {
-        const match = regex.exec(text);
-        if (match === null) {
-            break;
-        }
-        const boundary = callback(text, match);
-        if (boundary !== null) {
-            boundaries.push(boundary);
-        }
-    } while (regex.test(text));
-    return boundaries;
-}
-
-/**
- * Test a possible English sentence boundary match
- *
- * TODO: remove when switching to Parsoid. Copied from:
- * 
https://github.com/wikimedia/mediawiki-services-cxserver/blob/0d21a808f7ab6b82086171af927467c1b9460626/segmentation/languages/SegmenterDefault.js
- *
- * @param {string} text The plaintext to segment
- * @param {Object} match The possible boundary match (returned by regex.exec)
- * @return {number|null} The boundary offset, or null if not a sentence 
boundary
- */
-function findBoundary(text, match) {
-    const tail = text.slice(match.index + 1, text.length);
-    const head = text.slice(0, match.index);
-
-    // Trailing non-final punctuation: not a sentence boundary
-    if (tail.match(/^[,;:]/)) {
-        return null;
-    }
-    // Next word character is number or lower-case: not a sentence boundary
-    if (tail.match(/^\W*[0-9a-z]/)) {
-        return null;
-    }
-
-    // Do not break in abbreviations. Example D. John, St. Peter
-    const lastWord = head.match(/(\w*)$/)[0];
-    // Exclude at most 2 letter abbreviations. Examples: T. Dr. St. Jr. Sr. 
Ms. Mr.
-    // But not all caps like "UK." as in  "UK. Not US",
-    if (lastWord.length <= 2 && lastWord.match(/^\W*[A-Z][a-z]?$/) && 
tail.match(/^\W*[A-Z]/)) {
-        return null;
-    }
-
-    // Include any closing punctuation and trailing space
-    return match.index + 1 + tail.match(/^['”"’]*\s*/)[0].length;
-}
-
-/**
- * Find English sentence boundaries
- *
- * TODO: remove when switching to Parsoid. Copied from:
- * 
https://github.com/wikimedia/mediawiki-services-cxserver/blob/0d21a808f7ab6b82086171af927467c1b9460626/segmentation/languages/SegmenterDefault.js
- *
- * @param {string} text The plaintext to segment
- * @returns {number[]} Sentence boundary offsets
- */
-function getBoundaries(text) {
-    // Regex to find possible English sentence boundaries.
-    // Must not use a shared regex instance (re.lastIndex is used)
-    return findAll(text, /[.!?]/g, findBoundary);
-}
-
-function format(extract) {
-    const MAX_SENTENCES = 2;
-    const cleanStr = removeParens(extract.replace(/\s+/g, ' '));
-    const boundaries = getBoundaries(cleanStr);
-    const cleanStrEndIndex = boundaries[Math.min(boundaries.length, 
MAX_SENTENCES - 1)];
-
-    const ret = cleanStr.slice(0, cleanStrEndIndex).trim();
-    if (ret !== '…' && ret !== '..') {
-        return ret;
-    }
-}
-
-module.exports = {
-    format
-};

-- 
To view, visit https://gerrit.wikimedia.org/r/327396
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I35ee543be4c9ae6d14857fefc1ef42b64f18fa34
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/mobileapps
Gerrit-Branch: master
Gerrit-Owner: BearND <bsitzm...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

[MediaWiki-commits] [Gerrit] mediawiki...mobileapps[master]: Hygiene: remove extracts.js

Reply via email to