jenkins-bot has submitted this change and it was merged.
Change subject: Remove TZ offsets & add ordinal incators
......................................................................
Remove TZ offsets & add ordinal incators
* Add chrono-node library which has slightly better
parser than Date.parse().
* This adds support for ordinal indicators
* This unfortunately removes support for oridinal
numbers (less commonly used).
* Remove offsets from all dates containing TZ info;
this is because TZ info can shift the day to the day
before or after, which is confusing from the user-end,
see bug.
* Fixes to two scraper tests where the metadata
changed upstream
Bug: T145052
Change-Id: I44134fca827ed5c387f6f56b6670f25f6becb97c
---
M lib/Exporter.js
M package.json
M test/features/scraping/index.js
M test/features/scraping/lang.js
M test/features/unit/exporter.js
5 files changed, 71 insertions(+), 11 deletions(-)
Approvals:
Mobrovac: Looks good to me, approved
jenkins-bot: Verified
diff --git a/lib/Exporter.js b/lib/Exporter.js
index 3cc9007..8d57a3a 100644
--- a/lib/Exporter.js
+++ b/lib/Exporter.js
@@ -8,6 +8,7 @@
/* Import Modules */
var BBPromise = require('bluebird');
+var chrono = require('chrono-node');
var crypto = require('crypto');
var extend = require('extend');
var stripTags = require('striptags');
@@ -508,11 +509,26 @@
*/
function fixDate(citation){
if (citation.date){
- // Explicitly set TZ to GMT for servers not running in GMT
- var d = new Date(citation.date + (/[0-9]T[0-9]/.test(citation.date) ?
'' : ' GMT+00'));
- if (isFinite(d)) {
- citation.date = d.toISOString().split('T').shift();
- } else {
+ try {
+ var d;
+ // Try to parse with chrono first
+ var p = chrono.parse(citation.date); // Create ParsedResult object
with chrono
+ if (p && p[0] && p[0].start){
+ p[0].start.assign('timezoneOffset', 0); // Remove timezone
offset so that the user-observed date doesn't change based on offset
+ d = p[0].start.date(); // Create a Date object from
ParsedComponents Object
+ } else {
+ // Try to parse with Date.parse() as fallback; chrono doesn't
seem to work with ambigious dates, such as '2010'
+ d = new Date(citation.date +
(/[0-9]T[0-9]/.test(citation.date) ? '' : ' GMT+00')); // Explicitly set to GMT
time to avoid offset issue
+ }
+
+ // Lastly, remove time from date
+ if (isFinite(d)) {
+ citation.date = d.toISOString().split('T').shift();
+ } else {
+ // If no finite translation of the date is available, remove
the field
+ delete citation.date;
+ }
+ } catch (e) { // Remove field if errors are thrown
delete citation.date;
}
}
diff --git a/package.json b/package.json
index 9578f4d..b5fa125 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "citoid",
- "version": "0.4.2",
+ "version": "0.4.3",
"description": "Converts search terms such as URL or DOI into citations.",
"homepage": "https://www.mediawiki.org/wiki/Citoid",
"license": "Apache-2.0",
@@ -16,6 +16,7 @@
"bunyan": "1.8.1",
"cassandra-uuid": "0.0.2",
"cheerio": "0.20.0",
+ "chrono-node": "1.2.4",
"compression": "1.6.2",
"content-type": "1.0.2",
"core-js": "2.4.1",
diff --git a/test/features/scraping/index.js b/test/features/scraping/index.js
index e8ce812..54d3e1b 100644
--- a/test/features/scraping/index.js
+++ b/test/features/scraping/index.js
@@ -330,7 +330,7 @@
it('doi in url with query parameters - uses crossRef', function() {
return
server.query('http://www.example.com/10.1086/378695?uid=3739832&uid=2&uid=4&uid=3739256&sid=21105503736473').then(function(res)
{
assert.status(res, 200);
- assert.checkCitation(res, 'Salaries, Turnover, and Performance
in the Federal Criminal Justice System*');
+ assert.checkCitation(res, 'Salaries, Turnover, and Performance
in the Federal Criminal Justice System');
assert.deepEqual(res.body[0].DOI, '10.1086/378695');
assert.deepEqual(res.body[0].author.length, 1);
});
diff --git a/test/features/scraping/lang.js b/test/features/scraping/lang.js
index dadb952..f07a8cf 100644
--- a/test/features/scraping/lang.js
+++ b/test/features/scraping/lang.js
@@ -26,7 +26,7 @@
it('german twitter', function() {
return server.query('http://twitter.com', 'mediawiki',
'de').then(function(res) {
assert.status(res, 200);
- assert.checkCitation(res, 'Twitter - Sieh Dir an, was gerade
los ist');
+ assert.checkCitation(res, 'Twitter. Alles, was gerade los
ist.');
assert.deepEqual(!!res.body[0].accessDate, true, 'No
accessDate present');
});
});
diff --git a/test/features/unit/exporter.js b/test/features/unit/exporter.js
index 4899e8d..6b6d0a8 100644
--- a/test/features/unit/exporter.js
+++ b/test/features/unit/exporter.js
@@ -67,12 +67,55 @@
assert.deepEqual(result, expected);
});
- it('ISO format in different TZ', function() {
- date = '2013-04-02T20:00:03-07:00';
- expected = {date: '2013-04-03'};
+ it('Normal date', function() {
+ date = 'May 8 2010';
+ expected = {date: '2010-05-08'};
result = exporter.fixDate({date:date});
assert.deepEqual(result, expected);
});
+
+ it('Normal date with ordinal indicator', function() {
+ date = 'May 8th, 2010';
+ expected = {date: '2010-05-08'};
+ result = exporter.fixDate({date:date});
+ assert.deepEqual(result, expected);
+ });
+
+ it('Badly sets normal date with ordinal number', function() {
+ date = 'May eighth, 2010';
+ expected = {date: '2010-05-01'}; // Wrong sets to May 1st instead
of May 8th
+ result = exporter.fixDate({date:date});
+ assert.deepEqual(result, expected);
+ });
+
+ it('Date on the fence: ISO with - notation', function() {
+ date = '2013-04-02T20:00:03-07:00';
+ expected = {date: '2013-04-02'};
+ result = exporter.fixDate({date:date});
+ assert.deepEqual(result, expected);
+ });
+
+ it('Date on the fence; ISO with + notation', function() {
+ date = '2016-03-08T01:16:07+02:00';
+ expected = {date: '2016-03-08'};
+ result = exporter.fixDate({date:date});
+ assert.deepEqual(result, expected);
+ });
+
+ it('Date on the fence; toString output', function() {
+ date = 'Sat May 08 2010 00:16:07 GMT+0100 (BST)';
+ expected = {date: '2010-05-08'};
+ result = exporter.fixDate({date:date});
+ assert.deepEqual(result, expected);
+ });
+
+ it('Date on the fence; ISO with Z notation', function() {
+ date = '2010-05-08T00:16:00.060Z';
+ expected = {date: '2010-05-08'};
+ result = exporter.fixDate({date:date});
+ assert.deepEqual(result, expected);
+ });
+
});
describe('fixISBN function: ', function() {
--
To view, visit https://gerrit.wikimedia.org/r/309599
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I44134fca827ed5c387f6f56b6670f25f6becb97c
Gerrit-PatchSet: 4
Gerrit-Project: mediawiki/services/citoid
Gerrit-Branch: master
Gerrit-Owner: Mvolz <[email protected]>
Gerrit-Reviewer: Mobrovac <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits