[MediaWiki-commits] [Gerrit] mediawiki...citoid[master]: Remove TZ offsets & add ordinal incators
jenkins-bot has submitted this change and it was merged. Change subject: Remove TZ offsets & add ordinal incators .. Remove TZ offsets & add ordinal incators * Add chrono-node library which has slightly better parser than Date.parse(). * This adds support for ordinal indicators * This unfortunately removes support for oridinal numbers (less commonly used). * Remove offsets from all dates containing TZ info; this is because TZ info can shift the day to the day before or after, which is confusing from the user-end, see bug. * Fixes to two scraper tests where the metadata changed upstream Bug: T145052 Change-Id: I44134fca827ed5c387f6f56b6670f25f6becb97c --- M lib/Exporter.js M package.json M test/features/scraping/index.js M test/features/scraping/lang.js M test/features/unit/exporter.js 5 files changed, 71 insertions(+), 11 deletions(-) Approvals: Mobrovac: Looks good to me, approved jenkins-bot: Verified diff --git a/lib/Exporter.js b/lib/Exporter.js index 3cc9007..8d57a3a 100644 --- a/lib/Exporter.js +++ b/lib/Exporter.js @@ -8,6 +8,7 @@ /* Import Modules */ var BBPromise = require('bluebird'); +var chrono = require('chrono-node'); var crypto = require('crypto'); var extend = require('extend'); var stripTags = require('striptags'); @@ -508,11 +509,26 @@ */ function fixDate(citation){ if (citation.date){ -// Explicitly set TZ to GMT for servers not running in GMT -var d = new Date(citation.date + (/[0-9]T[0-9]/.test(citation.date) ? '' : ' GMT+00')); -if (isFinite(d)) { -citation.date = d.toISOString().split('T').shift(); -} else { +try { +var d; +// Try to parse with chrono first +var p = chrono.parse(citation.date); // Create ParsedResult object with chrono +if (p && p[0] && p[0].start){ +p[0].start.assign('timezoneOffset', 0); // Remove timezone offset so that the user-observed date doesn't change based on offset +d = p[0].start.date(); // Create a Date object from ParsedComponents Object +} else { +// Try to parse with Date.parse() as fallback; chrono doesn't seem to work with ambigious dates, such as '2010' +d = new Date(citation.date + (/[0-9]T[0-9]/.test(citation.date) ? '' : ' GMT+00')); // Explicitly set to GMT time to avoid offset issue +} + +// Lastly, remove time from date +if (isFinite(d)) { +citation.date = d.toISOString().split('T').shift(); +} else { +// If no finite translation of the date is available, remove the field +delete citation.date; +} +} catch (e) { // Remove field if errors are thrown delete citation.date; } } diff --git a/package.json b/package.json index 9578f4d..b5fa125 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "citoid", - "version": "0.4.2", + "version": "0.4.3", "description": "Converts search terms such as URL or DOI into citations.", "homepage": "https://www.mediawiki.org/wiki/Citoid";, "license": "Apache-2.0", @@ -16,6 +16,7 @@ "bunyan": "1.8.1", "cassandra-uuid": "0.0.2", "cheerio": "0.20.0", +"chrono-node": "1.2.4", "compression": "1.6.2", "content-type": "1.0.2", "core-js": "2.4.1", diff --git a/test/features/scraping/index.js b/test/features/scraping/index.js index e8ce812..54d3e1b 100644 --- a/test/features/scraping/index.js +++ b/test/features/scraping/index.js @@ -330,7 +330,7 @@ it('doi in url with query parameters - uses crossRef', function() { return server.query('http://www.example.com/10.1086/378695?uid=3739832&uid=2&uid=4&uid=3739256&sid=21105503736473').then(function(res) { assert.status(res, 200); -assert.checkCitation(res, 'Salaries, Turnover, and Performance in the Federal Criminal Justice System*'); +assert.checkCitation(res, 'Salaries, Turnover, and Performance in the Federal Criminal Justice System'); assert.deepEqual(res.body[0].DOI, '10.1086/378695'); assert.deepEqual(res.body[0].author.length, 1); }); diff --git a/test/features/scraping/lang.js b/test/features/scraping/lang.js index dadb952..f07a8cf 100644 --- a/test/features/scraping/lang.js +++ b/test/features/scraping/lang.js @@ -26,7 +26,7 @@ it('german twitter', function() { return server.query('http://twitter.com', 'mediawiki', 'de').then(function(res) { assert.status(res, 200); -assert.checkCitation(res, 'Twitter - Sieh Dir an, was gerade los ist'); +assert.checkCitation(res, 'Twitter. Alles, was gerade los ist.'); assert.deepEqual(!!res.body[0].accessDate, true, 'No accessDate present
[MediaWiki-commits] [Gerrit] mediawiki...citoid[master]: Remove TZ offsets & add ordinal incators
Mvolz has uploaded a new change for review. https://gerrit.wikimedia.org/r/309599 Change subject: Remove TZ offsets & add ordinal incators .. Remove TZ offsets & add ordinal incators * Add chrono-node library which has slightly better parser than Date.parse(). * This adds support for ordinal indicators * This unfortunately removes support for oridinal numbers (less commonly used). * Remove offsets from all dates containing TZ info; this is because TZ info can shift the day to the day before or after, which is confusing from the user-end, see bug. * Fixes to two scraper tests where the metadata changed upstream Bug: T145052 Change-Id: I44134fca827ed5c387f6f56b6670f25f6becb97c --- M lib/Exporter.js M package.json M test/features/scraping/index.js M test/features/scraping/lang.js M test/features/unit/exporter.js 5 files changed, 64 insertions(+), 8 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/citoid refs/changes/99/309599/1 diff --git a/lib/Exporter.js b/lib/Exporter.js index 3cc9007..04251cd 100644 --- a/lib/Exporter.js +++ b/lib/Exporter.js @@ -8,6 +8,7 @@ /* Import Modules */ var BBPromise = require('bluebird'); +var chrono = require('chrono-node'); var crypto = require('crypto'); var extend = require('extend'); var stripTags = require('striptags'); @@ -508,11 +509,22 @@ */ function fixDate(citation){ if (citation.date){ -// Explicitly set TZ to GMT for servers not running in GMT -var d = new Date(citation.date + (/[0-9]T[0-9]/.test(citation.date) ? '' : ' GMT+00')); +var d; +// Try to parse with chrono first +var p = chrono.parse(citation.date); // Create ParsedResult object with chrono +if (p && p[0] && p[0].start){ +p[0].start.assign('timezoneOffset', 0); // Remove timezone offset so that the user-observed date doesn't change based on offset +d = p[0].start.date(); // Create a Date object from ParsedComponents Object +} else { +// Try to parse with Date.parse() as fallback; chrono doesn't seem to work with ambigious dates, such as '2010' +d = new Date(citation.date + (/[0-9]T[0-9]/.test(citation.date) ? '' : ' GMT+00')); // Explicitly set to GMT time to avoid offset issue +} + +// Lastly, remove time from date if (isFinite(d)) { citation.date = d.toISOString().split('T').shift(); } else { +// If no finite translation of the date is available, remove the field delete citation.date; } } diff --git a/package.json b/package.json index 9578f4d..b5fa125 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "citoid", - "version": "0.4.2", + "version": "0.4.3", "description": "Converts search terms such as URL or DOI into citations.", "homepage": "https://www.mediawiki.org/wiki/Citoid";, "license": "Apache-2.0", @@ -16,6 +16,7 @@ "bunyan": "1.8.1", "cassandra-uuid": "0.0.2", "cheerio": "0.20.0", +"chrono-node": "1.2.4", "compression": "1.6.2", "content-type": "1.0.2", "core-js": "2.4.1", diff --git a/test/features/scraping/index.js b/test/features/scraping/index.js index e8ce812..577d2d6 100644 --- a/test/features/scraping/index.js +++ b/test/features/scraping/index.js @@ -330,7 +330,7 @@ it('doi in url with query parameters - uses crossRef', function() { return server.query('http://www.example.com/10.1086/378695?uid=3739832&uid=2&uid=4&uid=3739256&sid=21105503736473').then(function(res) { assert.status(res, 200); -assert.checkCitation(res, 'Salaries, Turnover, and Performance in the Federal Criminal Justice System*'); +assert.checkCitation(res, 'Salaries, Turnover, and Performance in the Federal Criminal Justice Systems'); assert.deepEqual(res.body[0].DOI, '10.1086/378695'); assert.deepEqual(res.body[0].author.length, 1); }); diff --git a/test/features/scraping/lang.js b/test/features/scraping/lang.js index dadb952..f07a8cf 100644 --- a/test/features/scraping/lang.js +++ b/test/features/scraping/lang.js @@ -26,7 +26,7 @@ it('german twitter', function() { return server.query('http://twitter.com', 'mediawiki', 'de').then(function(res) { assert.status(res, 200); -assert.checkCitation(res, 'Twitter - Sieh Dir an, was gerade los ist'); +assert.checkCitation(res, 'Twitter. Alles, was gerade los ist.'); assert.deepEqual(!!res.body[0].accessDate, true, 'No accessDate present'); }); }); diff --git a/test/features/unit/exporter.js b/test/features/unit/exporter.js index 4899e8d..6b6d0a8 100644 --- a/test/features/unit/exporter.js +++ b/test/features/unit/exporter.js @@ -67,12 +67,55 @@ asser